diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..52364b0e92698cf48671e2f03bed82c10ef14804 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+best.th filter=lfs diff=lfs merge=lfs -text
diff --git a/best.th b/best.th
new file mode 100644
index 0000000000000000000000000000000000000000..3b7ae8a69bc4faa4bc00b4e3b98e802af25ee256
--- /dev/null
+++ b/best.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9018ae76c648daff6f3de5d7cb1870fca81537c9024df37e5f77001de0249869
+size 1169366338
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7db64d1c3a923223751a9aba9aa5bb63d5b6eb62
--- /dev/null
+++ b/config.json
@@ -0,0 +1,109 @@
+{
+    "dataset_reader": {
+        "type": "s2s_manual_reader",
+        "source_token_indexer": {
+            "tokens": {
+                "type": "pretrained_transformer",
+                "do_lowercase": false,
+                "model_name": "./roberta"
+            }
+        },
+        "target_token_indexer": {
+            "tokens": {
+                "type": "single_id"
+            }
+        },
+        "tokenizer": {
+            "word_splitter": {
+                "type": "just_spaces"
+            }
+        }
+    },
+    "iterator": {
+        "type": "basic",
+        "batch_size": 32
+    },
+    "model": {
+        "type": "geo_s2s",
+        "beam_size": 10,
+        "encoder": {
+            "dropout": 0.5,
+            "emb_dim": 768,
+            "hid_dim": 512,
+            "input_dim": 21128
+        },
+        "knowledge_points_ratio": 0,
+        "max_decoding_steps": 16,
+        "resnet_pretrained": "./",
+        "scheduled_sampling_ratio": 0,
+        "source_embedder": {
+            "token_embedders": {}
+        },
+        "target_embedding_dim": 512
+    },
+    "train_data_path": "./GeoQA-Data/GeoQA-Pro/pro_train.pk",
+    "validation_data_path": "./GeoQA-Data/GeoQA-Pro/pro_dev.pk",
+    "test_data_path": "./GeoQA-Data/GeoQA-Pro/pro_test.pk",
+    "trainer": {
+        "cuda_device": 0,
+        "grad_norm": 10,
+        "learning_rate_scheduler": {
+            "type": "reduce_on_plateau",
+            "factor": 0.6,
+            "mode": "max",
+            "patience": 5
+        },
+        "num_epochs": 100,
+        "optimizer": {
+            "type": "adam",
+            "lr": 0.001,
+            "parameter_groups": [
+                [
+                    [
+                        "mcan",
+                        "merge_att",
+                        "channel_transform",
+                        "attflat_img",
+                        "attflat_lang",
+                        "decode_transform"
+                    ],
+                    {
+                        "lr": 1e-05
+                    }
+                ],
+                [
+                    [
+                        "resnet"
+                    ],
+                    {
+                        "lr": 1e-05
+                    }
+                ],
+                [
+                    [
+                        "source_embedder",
+                        "encoder.embedding"
+                    ],
+                    {
+                        "lr": 2e-05
+                    }
+                ],
+                [
+                    [
+                        "encoder.concat_trans",
+                        "encoder.concat_trans_",
+                        "encoder.lstm_embedding",
+                        "encoder.trans",
+                        "encoder.norm",
+                        "encoder.concat_norm"
+                    ],
+                    {
+                        "lr": 0.001
+                    }
+                ]
+            ]
+        },
+        "validation_metric": "+acc"
+    },
+    "evaluate_on_test": true
+}
\ No newline at end of file
diff --git a/log/train/events.out.tfevents.1742793704.amax b/log/train/events.out.tfevents.1742793704.amax
new file mode 100644
index 0000000000000000000000000000000000000000..2fb0d005fca123cb78d6ac07c9cf7b4497d6c24d
--- /dev/null
+++ b/log/train/events.out.tfevents.1742793704.amax
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1563426608092d5cb56619581eb9df3a627fdca704c262ae175dcb876795bbf0
+size 31502604
diff --git a/log/validation/events.out.tfevents.1742793704.amax b/log/validation/events.out.tfevents.1742793704.amax
new file mode 100644
index 0000000000000000000000000000000000000000..1d11d6c6f390fec9d7b05e177ce6c01690e00fbf
--- /dev/null
+++ b/log/validation/events.out.tfevents.1742793704.amax
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:490c22130c5c3b3a371839fb3d576de6f3a6d134ad253c8feb5bc24a277133eb
+size 17240
diff --git a/metrics.json b/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..7ba33a5245790f0d49ea5dc46238bf3ce3182d92
--- /dev/null
+++ b/metrics.json
@@ -0,0 +1,38 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:45:01.064397",
+  "training_start_epoch": 0,
+  "training_epochs": 99,
+  "epoch": 99,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.013839077864858237,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7266542542114865,
+  "validation_acc": 0.6417824074074074,
+  "validation_no_result": 0.11848958333333333,
+  "validation_loss": 1.416664329667886,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262,
+  "test_BLEU": 0.704433949149771,
+  "test_acc": 0.6443865740740741,
+  "test_no_result": 0.11559606481481481,
+  "test_loss": 1.2252581814924877
+}
\ No newline at end of file
diff --git a/metrics_epoch_0.json b/metrics_epoch_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..6be12a07115514826be628c7c5127417cc1119f1
--- /dev/null
+++ b/metrics_epoch_0.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 0,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 1974,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:01:29.098765",
+  "training_start_epoch": 0,
+  "training_epochs": 0,
+  "epoch": 0,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 1.6868594841523603,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 1974,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.23317665829903492,
+  "validation_acc": 0.36675347222222227,
+  "validation_no_result": 0.19661458333333334,
+  "validation_loss": 1.2027941942214966,
+  "best_validation_BLEU": 0.23317665829903492,
+  "best_validation_acc": 0.36675347222222227,
+  "best_validation_no_result": 0.19661458333333334,
+  "best_validation_loss": 1.2027941942214966
+}
\ No newline at end of file
diff --git a/metrics_epoch_1.json b/metrics_epoch_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..584e458b0eb0dd2019fa5b1a1a2352e13412f762
--- /dev/null
+++ b/metrics_epoch_1.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 1,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17628,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:02:54.787197",
+  "training_start_epoch": 0,
+  "training_epochs": 1,
+  "epoch": 1,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 1.1222382789308374,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17628,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.35292654895689146,
+  "validation_acc": 0.42476851851851855,
+  "validation_no_result": 0.16666666666666666,
+  "validation_loss": 1.0453250408172607,
+  "best_validation_BLEU": 0.35292654895689146,
+  "best_validation_acc": 0.42476851851851855,
+  "best_validation_no_result": 0.16666666666666666,
+  "best_validation_loss": 1.0453250408172607
+}
\ No newline at end of file
diff --git a/metrics_epoch_10.json b/metrics_epoch_10.json
new file mode 100644
index 0000000000000000000000000000000000000000..2f8ff654d183ffb2019bcbf027eb7377a7d05fe2
--- /dev/null
+++ b/metrics_epoch_10.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 10,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:15:48.319747",
+  "training_start_epoch": 0,
+  "training_epochs": 10,
+  "epoch": 10,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.6800853114236485,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.43079156908061556,
+  "validation_acc": 0.5237268518518519,
+  "validation_no_result": 0.1205150462962963,
+  "validation_loss": 0.7902998005350431,
+  "best_validation_BLEU": 0.43079156908061556,
+  "best_validation_acc": 0.5237268518518519,
+  "best_validation_no_result": 0.1205150462962963,
+  "best_validation_loss": 0.7902998005350431
+}
\ No newline at end of file
diff --git a/metrics_epoch_11.json b/metrics_epoch_11.json
new file mode 100644
index 0000000000000000000000000000000000000000..175dca4dd64a9ecd1b6066a64f9de0d58b2d411c
--- /dev/null
+++ b/metrics_epoch_11.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 10,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:17:16.634014",
+  "training_start_epoch": 0,
+  "training_epochs": 11,
+  "epoch": 11,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.6558576405048371,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.407816076308465,
+  "validation_acc": 0.4982638888888889,
+  "validation_no_result": 0.11791087962962964,
+  "validation_loss": 0.7638036062320074,
+  "best_validation_BLEU": 0.43079156908061556,
+  "best_validation_acc": 0.5237268518518519,
+  "best_validation_no_result": 0.1205150462962963,
+  "best_validation_loss": 0.7902998005350431
+}
\ No newline at end of file
diff --git a/metrics_epoch_12.json b/metrics_epoch_12.json
new file mode 100644
index 0000000000000000000000000000000000000000..b170da79384ce5f14aafc90fc6abfedb60d63952
--- /dev/null
+++ b/metrics_epoch_12.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 10,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:18:44.602728",
+  "training_start_epoch": 0,
+  "training_epochs": 12,
+  "epoch": 12,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.631023341959173,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.4312758899332716,
+  "validation_acc": 0.5112847222222222,
+  "validation_no_result": 0.1127025462962963,
+  "validation_loss": 0.766788254181544,
+  "best_validation_BLEU": 0.43079156908061556,
+  "best_validation_acc": 0.5237268518518519,
+  "best_validation_no_result": 0.1205150462962963,
+  "best_validation_loss": 0.7902998005350431
+}
\ No newline at end of file
diff --git a/metrics_epoch_13.json b/metrics_epoch_13.json
new file mode 100644
index 0000000000000000000000000000000000000000..354323d9a7b342c018c865bb8ed96277c613252b
--- /dev/null
+++ b/metrics_epoch_13.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 13,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:20:12.304651",
+  "training_start_epoch": 0,
+  "training_epochs": 13,
+  "epoch": 13,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.590884352272207,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.5037565205317504,
+  "validation_acc": 0.5601851851851852,
+  "validation_no_result": 0.09577546296296297,
+  "validation_loss": 0.7520468930403391,
+  "best_validation_BLEU": 0.5037565205317504,
+  "best_validation_acc": 0.5601851851851852,
+  "best_validation_no_result": 0.09577546296296297,
+  "best_validation_loss": 0.7520468930403391
+}
\ No newline at end of file
diff --git a/metrics_epoch_14.json b/metrics_epoch_14.json
new file mode 100644
index 0000000000000000000000000000000000000000..d8f1b1744494fbe4167173b91b33d668210b9e55
--- /dev/null
+++ b/metrics_epoch_14.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 14,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:21:39.478923",
+  "training_start_epoch": 0,
+  "training_epochs": 14,
+  "epoch": 14,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.5659671618179841,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.5427631598053875,
+  "validation_acc": 0.5784143518518519,
+  "validation_no_result": 0.09577546296296297,
+  "validation_loss": 0.7350192765394846,
+  "best_validation_BLEU": 0.5427631598053875,
+  "best_validation_acc": 0.5784143518518519,
+  "best_validation_no_result": 0.09577546296296297,
+  "best_validation_loss": 0.7350192765394846
+}
\ No newline at end of file
diff --git a/metrics_epoch_15.json b/metrics_epoch_15.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a28ee29aac48169191967a7de8c9696fa397863
--- /dev/null
+++ b/metrics_epoch_15.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 14,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:23:06.755342",
+  "training_start_epoch": 0,
+  "training_epochs": 15,
+  "epoch": 15,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.5331384379755367,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.5071943127904087,
+  "validation_acc": 0.5536747685185185,
+  "validation_no_result": 0.10228587962962964,
+  "validation_loss": 0.7253126613795757,
+  "best_validation_BLEU": 0.5427631598053875,
+  "best_validation_acc": 0.5784143518518519,
+  "best_validation_no_result": 0.09577546296296297,
+  "best_validation_loss": 0.7350192765394846
+}
\ No newline at end of file
diff --git a/metrics_epoch_16.json b/metrics_epoch_16.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c567cacaff57e650b5cc21790b1c9377ba686d4
--- /dev/null
+++ b/metrics_epoch_16.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 14,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:24:34.104787",
+  "training_start_epoch": 0,
+  "training_epochs": 16,
+  "epoch": 16,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.509379303726283,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.5108672141994016,
+  "validation_acc": 0.5536747685185185,
+  "validation_no_result": 0.109375,
+  "validation_loss": 0.7373132693270842,
+  "best_validation_BLEU": 0.5427631598053875,
+  "best_validation_acc": 0.5784143518518519,
+  "best_validation_no_result": 0.09577546296296297,
+  "best_validation_loss": 0.7350192765394846
+}
\ No newline at end of file
diff --git a/metrics_epoch_17.json b/metrics_epoch_17.json
new file mode 100644
index 0000000000000000000000000000000000000000..12997471217567dcbaa387e077b6049ec2118392
--- /dev/null
+++ b/metrics_epoch_17.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 14,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:26:00.758793",
+  "training_start_epoch": 0,
+  "training_epochs": 17,
+  "epoch": 17,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.485344271497293,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.5599993581570306,
+  "validation_acc": 0.5706018518518519,
+  "validation_no_result": 0.09186921296296297,
+  "validation_loss": 0.7641393554707369,
+  "best_validation_BLEU": 0.5427631598053875,
+  "best_validation_acc": 0.5784143518518519,
+  "best_validation_no_result": 0.09577546296296297,
+  "best_validation_loss": 0.7350192765394846
+}
\ No newline at end of file
diff --git a/metrics_epoch_18.json b/metrics_epoch_18.json
new file mode 100644
index 0000000000000000000000000000000000000000..c6c02efe3316ebc09e67706419d0136a8dd49ca7
--- /dev/null
+++ b/metrics_epoch_18.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 14,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:27:31.038986",
+  "training_start_epoch": 0,
+  "training_epochs": 18,
+  "epoch": 18,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.46907018016685137,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.5504686593347489,
+  "validation_acc": 0.5653935185185185,
+  "validation_no_result": 0.12181712962962964,
+  "validation_loss": 0.772665457179149,
+  "best_validation_BLEU": 0.5427631598053875,
+  "best_validation_acc": 0.5784143518518519,
+  "best_validation_no_result": 0.09577546296296297,
+  "best_validation_loss": 0.7350192765394846
+}
\ No newline at end of file
diff --git a/metrics_epoch_19.json b/metrics_epoch_19.json
new file mode 100644
index 0000000000000000000000000000000000000000..0281c2b88bf5ba412f6caaf80ac5be25767a1676
--- /dev/null
+++ b/metrics_epoch_19.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 14,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:28:55.198844",
+  "training_start_epoch": 0,
+  "training_epochs": 19,
+  "epoch": 19,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.44485945268110794,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.5366820134380189,
+  "validation_acc": 0.5679976851851852,
+  "validation_no_result": 0.09447337962962964,
+  "validation_loss": 0.7757748365402222,
+  "best_validation_BLEU": 0.5427631598053875,
+  "best_validation_acc": 0.5784143518518519,
+  "best_validation_no_result": 0.09577546296296297,
+  "best_validation_loss": 0.7350192765394846
+}
\ No newline at end of file
diff --git a/metrics_epoch_2.json b/metrics_epoch_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..e0f270c5311e90932c2dee20c56f639d53e2687d
--- /dev/null
+++ b/metrics_epoch_2.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 1,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:04:23.165231",
+  "training_start_epoch": 0,
+  "training_epochs": 2,
+  "epoch": 2,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 1.0030749413100155,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.28082488387199267,
+  "validation_acc": 0.41753472222222227,
+  "validation_no_result": 0.17447916666666666,
+  "validation_loss": 0.9994873702526093,
+  "best_validation_BLEU": 0.35292654895689146,
+  "best_validation_acc": 0.42476851851851855,
+  "best_validation_no_result": 0.16666666666666666,
+  "best_validation_loss": 1.0453250408172607
+}
\ No newline at end of file
diff --git a/metrics_epoch_20.json b/metrics_epoch_20.json
new file mode 100644
index 0000000000000000000000000000000000000000..69d4c7f9e87961cfa225e4a2b890e622ed6559a8
--- /dev/null
+++ b/metrics_epoch_20.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 20,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:30:24.957092",
+  "training_start_epoch": 0,
+  "training_epochs": 20,
+  "epoch": 20,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.4409686741503802,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.5588327729022262,
+  "validation_acc": 0.5817418981481481,
+  "validation_no_result": 0.10358796296296297,
+  "validation_loss": 0.7639288852612177,
+  "best_validation_BLEU": 0.5588327729022262,
+  "best_validation_acc": 0.5817418981481481,
+  "best_validation_no_result": 0.10358796296296297,
+  "best_validation_loss": 0.7639288852612177
+}
\ No newline at end of file
diff --git a/metrics_epoch_21.json b/metrics_epoch_21.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf219935569453e770cbfc8d340849978e6aa6d1
--- /dev/null
+++ b/metrics_epoch_21.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 21,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:31:50.125831",
+  "training_start_epoch": 0,
+  "training_epochs": 21,
+  "epoch": 21,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.4123601945963773,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6021588888536067,
+  "validation_acc": 0.5875289351851852,
+  "validation_no_result": 0.10228587962962964,
+  "validation_loss": 0.77462221433719,
+  "best_validation_BLEU": 0.6021588888536067,
+  "best_validation_acc": 0.5875289351851852,
+  "best_validation_no_result": 0.10228587962962964,
+  "best_validation_loss": 0.77462221433719
+}
\ No newline at end of file
diff --git a/metrics_epoch_22.json b/metrics_epoch_22.json
new file mode 100644
index 0000000000000000000000000000000000000000..87b80003a4e15eb2442553dfdb4cbdc4683515a0
--- /dev/null
+++ b/metrics_epoch_22.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 22,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:33:17.006857",
+  "training_start_epoch": 0,
+  "training_epochs": 22,
+  "epoch": 22,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.38905653899366205,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6125124234113125,
+  "validation_acc": 0.5992476851851852,
+  "validation_no_result": 0.0970775462962963,
+  "validation_loss": 0.7628330377240976,
+  "best_validation_BLEU": 0.6125124234113125,
+  "best_validation_acc": 0.5992476851851852,
+  "best_validation_no_result": 0.0970775462962963,
+  "best_validation_loss": 0.7628330377240976
+}
\ No newline at end of file
diff --git a/metrics_epoch_23.json b/metrics_epoch_23.json
new file mode 100644
index 0000000000000000000000000000000000000000..3b87dceed41fc94b0ed1d50f8a867244fd9daae2
--- /dev/null
+++ b/metrics_epoch_23.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 22,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:34:40.839620",
+  "training_start_epoch": 0,
+  "training_epochs": 23,
+  "epoch": 23,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.3673159715804187,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6080388160655757,
+  "validation_acc": 0.5986689814814815,
+  "validation_no_result": 0.1166087962962963,
+  "validation_loss": 0.8231359881659349,
+  "best_validation_BLEU": 0.6125124234113125,
+  "best_validation_acc": 0.5992476851851852,
+  "best_validation_no_result": 0.0970775462962963,
+  "best_validation_loss": 0.7628330377240976
+}
\ No newline at end of file
diff --git a/metrics_epoch_24.json b/metrics_epoch_24.json
new file mode 100644
index 0000000000000000000000000000000000000000..dbc16c584831de3cb25ece4c6150dddd2a805ebe
--- /dev/null
+++ b/metrics_epoch_24.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 22,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:36:03.591584",
+  "training_start_epoch": 0,
+  "training_epochs": 24,
+  "epoch": 24,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.3589384977113117,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6044499896265613,
+  "validation_acc": 0.5985243055555556,
+  "validation_no_result": 0.10951967592592593,
+  "validation_loss": 0.8028569320837656,
+  "best_validation_BLEU": 0.6125124234113125,
+  "best_validation_acc": 0.5992476851851852,
+  "best_validation_no_result": 0.0970775462962963,
+  "best_validation_loss": 0.7628330377240976
+}
\ No newline at end of file
diff --git a/metrics_epoch_25.json b/metrics_epoch_25.json
new file mode 100644
index 0000000000000000000000000000000000000000..b444d745410db4b71e8412231c12e700cee8837f
--- /dev/null
+++ b/metrics_epoch_25.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 25,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:37:26.154131",
+  "training_start_epoch": 0,
+  "training_epochs": 25,
+  "epoch": 25,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.34136158214374024,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.5987859616867737,
+  "validation_acc": 0.6005497685185185,
+  "validation_no_result": 0.11863425925925926,
+  "validation_loss": 0.8074321200450262,
+  "best_validation_BLEU": 0.5987859616867737,
+  "best_validation_acc": 0.6005497685185185,
+  "best_validation_no_result": 0.11863425925925926,
+  "best_validation_loss": 0.8074321200450262
+}
\ No newline at end of file
diff --git a/metrics_epoch_26.json b/metrics_epoch_26.json
new file mode 100644
index 0000000000000000000000000000000000000000..504be34b77388dca5a48b3177ecd0c29df99555a
--- /dev/null
+++ b/metrics_epoch_26.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 25,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:38:48.554449",
+  "training_start_epoch": 0,
+  "training_epochs": 26,
+  "epoch": 26,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.3194368757984855,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6175642128698793,
+  "validation_acc": 0.5966435185185185,
+  "validation_no_result": 0.11082175925925926,
+  "validation_loss": 0.8277938465277354,
+  "best_validation_BLEU": 0.5987859616867737,
+  "best_validation_acc": 0.6005497685185185,
+  "best_validation_no_result": 0.11863425925925926,
+  "best_validation_loss": 0.8074321200450262
+}
\ No newline at end of file
diff --git a/metrics_epoch_27.json b/metrics_epoch_27.json
new file mode 100644
index 0000000000000000000000000000000000000000..94d86b318c281a1ea06f0aff9a35844eb835a3b2
--- /dev/null
+++ b/metrics_epoch_27.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 27,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:40:11.798522",
+  "training_start_epoch": 0,
+  "training_epochs": 27,
+  "epoch": 27,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.30391626872799615,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6246666258278749,
+  "validation_acc": 0.6031539351851852,
+  "validation_no_result": 0.12514467592592593,
+  "validation_loss": 0.8511058837175369,
+  "best_validation_BLEU": 0.6246666258278749,
+  "best_validation_acc": 0.6031539351851852,
+  "best_validation_no_result": 0.12514467592592593,
+  "best_validation_loss": 0.8511058837175369
+}
\ No newline at end of file
diff --git a/metrics_epoch_28.json b/metrics_epoch_28.json
new file mode 100644
index 0000000000000000000000000000000000000000..951878495cae78b0058e8c4d4caf13630ec461fc
--- /dev/null
+++ b/metrics_epoch_28.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 27,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:41:35.626352",
+  "training_start_epoch": 0,
+  "training_epochs": 28,
+  "epoch": 28,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.29144989455288106,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.648115855550148,
+  "validation_acc": 0.6018518518518519,
+  "validation_no_result": 0.1087962962962963,
+  "validation_loss": 0.8601374352971712,
+  "best_validation_BLEU": 0.6246666258278749,
+  "best_validation_acc": 0.6031539351851852,
+  "best_validation_no_result": 0.12514467592592593,
+  "best_validation_loss": 0.8511058837175369
+}
\ No newline at end of file
diff --git a/metrics_epoch_29.json b/metrics_epoch_29.json
new file mode 100644
index 0000000000000000000000000000000000000000..2adc13597a324ef933c59293850f606300c91b60
--- /dev/null
+++ b/metrics_epoch_29.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 29,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:42:59.318964",
+  "training_start_epoch": 0,
+  "training_epochs": 29,
+  "epoch": 29,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.26861848641525615,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6362762956373789,
+  "validation_acc": 0.6129918981481481,
+  "validation_no_result": 0.11400462962962964,
+  "validation_loss": 0.8515521312753359,
+  "best_validation_BLEU": 0.6362762956373789,
+  "best_validation_acc": 0.6129918981481481,
+  "best_validation_no_result": 0.11400462962962964,
+  "best_validation_loss": 0.8515521312753359
+}
\ No newline at end of file
diff --git a/metrics_epoch_3.json b/metrics_epoch_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..3974057b06fc5e141cb099530315ae13113ae65f
--- /dev/null
+++ b/metrics_epoch_3.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 1,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:05:51.190753",
+  "training_start_epoch": 0,
+  "training_epochs": 3,
+  "epoch": 3,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.9417703888633034,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.3121575177212757,
+  "validation_acc": 0.41681134259259256,
+  "validation_no_result": 0.17317708333333334,
+  "validation_loss": 0.9283205419778824,
+  "best_validation_BLEU": 0.35292654895689146,
+  "best_validation_acc": 0.42476851851851855,
+  "best_validation_no_result": 0.16666666666666666,
+  "best_validation_loss": 1.0453250408172607
+}
\ No newline at end of file
diff --git a/metrics_epoch_30.json b/metrics_epoch_30.json
new file mode 100644
index 0000000000000000000000000000000000000000..c21f938414686c7f22bef218c4c7722688671874
--- /dev/null
+++ b/metrics_epoch_30.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 29,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:44:36.049061",
+  "training_start_epoch": 0,
+  "training_epochs": 30,
+  "epoch": 30,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.25431940298188815,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6410739070257515,
+  "validation_acc": 0.5907118055555556,
+  "validation_no_result": 0.1043113425925926,
+  "validation_loss": 0.89767703662316,
+  "best_validation_BLEU": 0.6362762956373789,
+  "best_validation_acc": 0.6129918981481481,
+  "best_validation_no_result": 0.11400462962962964,
+  "best_validation_loss": 0.8515521312753359
+}
\ No newline at end of file
diff --git a/metrics_epoch_31.json b/metrics_epoch_31.json
new file mode 100644
index 0000000000000000000000000000000000000000..eb886075633242cb78d508601db186714c3ffa2f
--- /dev/null
+++ b/metrics_epoch_31.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 29,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:45:58.919362",
+  "training_start_epoch": 0,
+  "training_epochs": 31,
+  "epoch": 31,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.2436353249983354,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6502029305796647,
+  "validation_acc": 0.6038773148148148,
+  "validation_no_result": 0.11921296296296297,
+  "validation_loss": 0.8545631468296051,
+  "best_validation_BLEU": 0.6362762956373789,
+  "best_validation_acc": 0.6129918981481481,
+  "best_validation_no_result": 0.11400462962962964,
+  "best_validation_loss": 0.8515521312753359
+}
\ No newline at end of file
diff --git a/metrics_epoch_32.json b/metrics_epoch_32.json
new file mode 100644
index 0000000000000000000000000000000000000000..f9d46916dc92ca524517247a96e3cb6c77460893
--- /dev/null
+++ b/metrics_epoch_32.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 29,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:47:27.298834",
+  "training_start_epoch": 0,
+  "training_epochs": 32,
+  "epoch": 32,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.24082526537505064,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6210247267996586,
+  "validation_acc": 0.5901331018518519,
+  "validation_no_result": 0.11921296296296297,
+  "validation_loss": 0.9147681097189585,
+  "best_validation_BLEU": 0.6362762956373789,
+  "best_validation_acc": 0.6129918981481481,
+  "best_validation_no_result": 0.11400462962962964,
+  "best_validation_loss": 0.8515521312753359
+}
\ No newline at end of file
diff --git a/metrics_epoch_33.json b/metrics_epoch_33.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a40cb1321edc0e78527214b77f808de2c863684
--- /dev/null
+++ b/metrics_epoch_33.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 29,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:48:57.872746",
+  "training_start_epoch": 0,
+  "training_epochs": 33,
+  "epoch": 33,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.22514350820671428,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6454995678751785,
+  "validation_acc": 0.5941840277777778,
+  "validation_no_result": 0.1171875,
+  "validation_loss": 0.9150293692946434,
+  "best_validation_BLEU": 0.6362762956373789,
+  "best_validation_acc": 0.6129918981481481,
+  "best_validation_no_result": 0.11400462962962964,
+  "best_validation_loss": 0.8515521312753359
+}
\ No newline at end of file
diff --git a/metrics_epoch_34.json b/metrics_epoch_34.json
new file mode 100644
index 0000000000000000000000000000000000000000..bd87d98412e38b872c64b03b21f73fda4ea0ae0b
--- /dev/null
+++ b/metrics_epoch_34.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 29,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:50:26.974149",
+  "training_start_epoch": 0,
+  "training_epochs": 34,
+  "epoch": 34,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.20833875049244274,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6765339411713064,
+  "validation_acc": 0.5921585648148148,
+  "validation_no_result": 0.11400462962962964,
+  "validation_loss": 0.9229913602272669,
+  "best_validation_BLEU": 0.6362762956373789,
+  "best_validation_acc": 0.6129918981481481,
+  "best_validation_no_result": 0.11400462962962964,
+  "best_validation_loss": 0.8515521312753359
+}
\ No newline at end of file
diff --git a/metrics_epoch_35.json b/metrics_epoch_35.json
new file mode 100644
index 0000000000000000000000000000000000000000..45639dc05dbfe85da1746675f70b5d9c1eadc242
--- /dev/null
+++ b/metrics_epoch_35.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 35,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:51:51.108377",
+  "training_start_epoch": 0,
+  "training_epochs": 35,
+  "epoch": 35,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.1996874829584902,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6651120785884496,
+  "validation_acc": 0.6176215277777778,
+  "validation_no_result": 0.09635416666666667,
+  "validation_loss": 0.9561783224344254,
+  "best_validation_BLEU": 0.6651120785884496,
+  "best_validation_acc": 0.6176215277777778,
+  "best_validation_no_result": 0.09635416666666667,
+  "best_validation_loss": 0.9561783224344254
+}
\ No newline at end of file
diff --git a/metrics_epoch_36.json b/metrics_epoch_36.json
new file mode 100644
index 0000000000000000000000000000000000000000..16812b9ca985694d7b0449dd6aea6d4449d51aa3
--- /dev/null
+++ b/metrics_epoch_36.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 36,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:53:37.984899",
+  "training_start_epoch": 0,
+  "training_epochs": 36,
+  "epoch": 36,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.19211301336234266,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6725576195507628,
+  "validation_acc": 0.6215277777777778,
+  "validation_no_result": 0.10677083333333333,
+  "validation_loss": 0.9550605937838554,
+  "best_validation_BLEU": 0.6725576195507628,
+  "best_validation_acc": 0.6215277777777778,
+  "best_validation_no_result": 0.10677083333333333,
+  "best_validation_loss": 0.9550605937838554
+}
\ No newline at end of file
diff --git a/metrics_epoch_37.json b/metrics_epoch_37.json
new file mode 100644
index 0000000000000000000000000000000000000000..52cb8b6ad3abc2700fcabc16be3948374975808d
--- /dev/null
+++ b/metrics_epoch_37.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 37,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:55:23.433659",
+  "training_start_epoch": 0,
+  "training_epochs": 37,
+  "epoch": 37,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.17771621271967888,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6769016467773605,
+  "validation_acc": 0.6247106481481481,
+  "validation_no_result": 0.1087962962962963,
+  "validation_loss": 0.9989128684004148,
+  "best_validation_BLEU": 0.6769016467773605,
+  "best_validation_acc": 0.6247106481481481,
+  "best_validation_no_result": 0.1087962962962963,
+  "best_validation_loss": 0.9989128684004148
+}
\ No newline at end of file
diff --git a/metrics_epoch_38.json b/metrics_epoch_38.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c8053505e2d1f1ffe59e0d8e79da55082279127
--- /dev/null
+++ b/metrics_epoch_38.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 37,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:57:10.320601",
+  "training_start_epoch": 0,
+  "training_epochs": 38,
+  "epoch": 38,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.1800003968179226,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6738853719385639,
+  "validation_acc": 0.6208043981481481,
+  "validation_no_result": 0.1166087962962963,
+  "validation_loss": 0.9844465777277946,
+  "best_validation_BLEU": 0.6769016467773605,
+  "best_validation_acc": 0.6247106481481481,
+  "best_validation_no_result": 0.1087962962962963,
+  "best_validation_loss": 0.9989128684004148
+}
\ No newline at end of file
diff --git a/metrics_epoch_39.json b/metrics_epoch_39.json
new file mode 100644
index 0000000000000000000000000000000000000000..e2b3d7a00a09205640d59b47d308d9139c0d2c38
--- /dev/null
+++ b/metrics_epoch_39.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 37,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:58:47.382407",
+  "training_start_epoch": 0,
+  "training_epochs": 39,
+  "epoch": 39,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.17429773367264054,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6865457621040599,
+  "validation_acc": 0.5914351851851852,
+  "validation_no_result": 0.1205150462962963,
+  "validation_loss": 1.013327990969022,
+  "best_validation_BLEU": 0.6769016467773605,
+  "best_validation_acc": 0.6247106481481481,
+  "best_validation_no_result": 0.1087962962962963,
+  "best_validation_loss": 0.9989128684004148
+}
\ No newline at end of file
diff --git a/metrics_epoch_4.json b/metrics_epoch_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d598cfe944965cce64dbe1cc99e3ef926d50b2a
--- /dev/null
+++ b/metrics_epoch_4.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 4,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:07:20.149041",
+  "training_start_epoch": 0,
+  "training_epochs": 4,
+  "epoch": 4,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.895405513048172,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.32383508127737887,
+  "validation_acc": 0.43171296296296297,
+  "validation_no_result": 0.1556712962962963,
+  "validation_loss": 0.9082479352752367,
+  "best_validation_BLEU": 0.32383508127737887,
+  "best_validation_acc": 0.43171296296296297,
+  "best_validation_no_result": 0.1556712962962963,
+  "best_validation_loss": 0.9082479352752367
+}
\ No newline at end of file
diff --git a/metrics_epoch_40.json b/metrics_epoch_40.json
new file mode 100644
index 0000000000000000000000000000000000000000..cc626604b5de2a45d66e0465e6007f511c4166f4
--- /dev/null
+++ b/metrics_epoch_40.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 37,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:00:11.059935",
+  "training_start_epoch": 0,
+  "training_epochs": 40,
+  "epoch": 40,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.17112242193384605,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6821643018761171,
+  "validation_acc": 0.6170428240740741,
+  "validation_no_result": 0.11588541666666667,
+  "validation_loss": 1.03222210953633,
+  "best_validation_BLEU": 0.6769016467773605,
+  "best_validation_acc": 0.6247106481481481,
+  "best_validation_no_result": 0.1087962962962963,
+  "best_validation_loss": 0.9989128684004148
+}
\ No newline at end of file
diff --git a/metrics_epoch_41.json b/metrics_epoch_41.json
new file mode 100644
index 0000000000000000000000000000000000000000..54d3d1c1b2eda36dfadf70d4ec979f86c31887fd
--- /dev/null
+++ b/metrics_epoch_41.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 37,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:01:44.799608",
+  "training_start_epoch": 0,
+  "training_epochs": 41,
+  "epoch": 41,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.1595429590479894,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6684891541351873,
+  "validation_acc": 0.6077835648148148,
+  "validation_no_result": 0.12239583333333333,
+  "validation_loss": 1.0189014325539272,
+  "best_validation_BLEU": 0.6769016467773605,
+  "best_validation_acc": 0.6247106481481481,
+  "best_validation_no_result": 0.1087962962962963,
+  "best_validation_loss": 0.9989128684004148
+}
\ No newline at end of file
diff --git a/metrics_epoch_42.json b/metrics_epoch_42.json
new file mode 100644
index 0000000000000000000000000000000000000000..ba07ccd1382c968334d0f59d863b854555ae067e
--- /dev/null
+++ b/metrics_epoch_42.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 37,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:03:09.898860",
+  "training_start_epoch": 0,
+  "training_epochs": 42,
+  "epoch": 42,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.15222934606400404,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6837890169037233,
+  "validation_acc": 0.6247106481481481,
+  "validation_no_result": 0.1087962962962963,
+  "validation_loss": 1.038762167096138,
+  "best_validation_BLEU": 0.6769016467773605,
+  "best_validation_acc": 0.6247106481481481,
+  "best_validation_no_result": 0.1087962962962963,
+  "best_validation_loss": 0.9989128684004148
+}
\ No newline at end of file
diff --git a/metrics_epoch_43.json b/metrics_epoch_43.json
new file mode 100644
index 0000000000000000000000000000000000000000..0983e381f26a227505bb255e14e5fca9a767be14
--- /dev/null
+++ b/metrics_epoch_43.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 37,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:04:44.721997",
+  "training_start_epoch": 0,
+  "training_epochs": 43,
+  "epoch": 43,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.14076573699712752,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.696944273226657,
+  "validation_acc": 0.6077835648148148,
+  "validation_no_result": 0.12644675925925927,
+  "validation_loss": 1.0652493784825008,
+  "best_validation_BLEU": 0.6769016467773605,
+  "best_validation_acc": 0.6247106481481481,
+  "best_validation_no_result": 0.1087962962962963,
+  "best_validation_loss": 0.9989128684004148
+}
\ No newline at end of file
diff --git a/metrics_epoch_44.json b/metrics_epoch_44.json
new file mode 100644
index 0000000000000000000000000000000000000000..cf80d34cd5e1ef01e1afb047845627375f43f269
--- /dev/null
+++ b/metrics_epoch_44.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 37,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:06:08.184296",
+  "training_start_epoch": 0,
+  "training_epochs": 44,
+  "epoch": 44,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.11980391334403645,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7046144983383263,
+  "validation_acc": 0.6247106481481481,
+  "validation_no_result": 0.11791087962962964,
+  "validation_loss": 1.061642122765382,
+  "best_validation_BLEU": 0.6769016467773605,
+  "best_validation_acc": 0.6247106481481481,
+  "best_validation_no_result": 0.1087962962962963,
+  "best_validation_loss": 0.9989128684004148
+}
\ No newline at end of file
diff --git a/metrics_epoch_45.json b/metrics_epoch_45.json
new file mode 100644
index 0000000000000000000000000000000000000000..0746ba95bf2885a641de1a476721cb81045462c3
--- /dev/null
+++ b/metrics_epoch_45.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 37,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:07:42.634039",
+  "training_start_epoch": 0,
+  "training_epochs": 45,
+  "epoch": 45,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.09614421298558062,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.704835063996358,
+  "validation_acc": 0.6129918981481481,
+  "validation_no_result": 0.11530671296296297,
+  "validation_loss": 1.112284041941166,
+  "best_validation_BLEU": 0.6769016467773605,
+  "best_validation_acc": 0.6247106481481481,
+  "best_validation_no_result": 0.1087962962962963,
+  "best_validation_loss": 0.9989128684004148
+}
\ No newline at end of file
diff --git a/metrics_epoch_46.json b/metrics_epoch_46.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a76d1e5d95a9fd22b4dcd4c44e2f896231ce6c4
--- /dev/null
+++ b/metrics_epoch_46.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:09:16.409030",
+  "training_start_epoch": 0,
+  "training_epochs": 46,
+  "epoch": 46,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.08739731924777681,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6984141302081079,
+  "validation_acc": 0.6416377314814815,
+  "validation_no_result": 0.11530671296296297,
+  "validation_loss": 1.121234434346358,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_47.json b/metrics_epoch_47.json
new file mode 100644
index 0000000000000000000000000000000000000000..2260a88025581ea89d3904e4c3ae49af9d3e0e4d
--- /dev/null
+++ b/metrics_epoch_47.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:10:55.662405",
+  "training_start_epoch": 0,
+  "training_epochs": 47,
+  "epoch": 47,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.08211925696920265,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6930361013969923,
+  "validation_acc": 0.634548611111111,
+  "validation_no_result": 0.10416666666666667,
+  "validation_loss": 1.133825662235419,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_48.json b/metrics_epoch_48.json
new file mode 100644
index 0000000000000000000000000000000000000000..f12a0465ecdd1fca2c083151143d217ba78980af
--- /dev/null
+++ b/metrics_epoch_48.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:12:31.546018",
+  "training_start_epoch": 0,
+  "training_epochs": 48,
+  "epoch": 48,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.07778177600015293,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7073071684953178,
+  "validation_acc": 0.6312210648148148,
+  "validation_no_result": 0.12311921296296297,
+  "validation_loss": 1.1585969477891922,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_49.json b/metrics_epoch_49.json
new file mode 100644
index 0000000000000000000000000000000000000000..c51312ef225f1400923943162177b427a46caf55
--- /dev/null
+++ b/metrics_epoch_49.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:14:12.341546",
+  "training_start_epoch": 0,
+  "training_epochs": 49,
+  "epoch": 49,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.08109187040139329,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6964869334225392,
+  "validation_acc": 0.6325231481481481,
+  "validation_no_result": 0.1127025462962963,
+  "validation_loss": 1.1651760389407475,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_5.json b/metrics_epoch_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..823fce8e5047a971d3408509f3cce4d615501774
--- /dev/null
+++ b/metrics_epoch_5.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 5,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:08:45.223627",
+  "training_start_epoch": 0,
+  "training_epochs": 5,
+  "epoch": 5,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.8448473995382135,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.3522097889512166,
+  "validation_acc": 0.44285300925925924,
+  "validation_no_result": 0.13151041666666666,
+  "validation_loss": 0.8661380161841711,
+  "best_validation_BLEU": 0.3522097889512166,
+  "best_validation_acc": 0.44285300925925924,
+  "best_validation_no_result": 0.13151041666666666,
+  "best_validation_loss": 0.8661380161841711
+}
\ No newline at end of file
diff --git a/metrics_epoch_50.json b/metrics_epoch_50.json
new file mode 100644
index 0000000000000000000000000000000000000000..ce5ec6998643db0b8939fc7d2f4c522ebd24e44c
--- /dev/null
+++ b/metrics_epoch_50.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:15:50.787731",
+  "training_start_epoch": 0,
+  "training_epochs": 50,
+  "epoch": 50,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.07383564747869968,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7039476502305033,
+  "validation_acc": 0.6234085648148148,
+  "validation_no_result": 0.11140046296296297,
+  "validation_loss": 1.1804772640268009,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_51.json b/metrics_epoch_51.json
new file mode 100644
index 0000000000000000000000000000000000000000..5f3576c1ec2f5683ad6f1e6749d563067ef617cf
--- /dev/null
+++ b/metrics_epoch_51.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:17:30.011877",
+  "training_start_epoch": 0,
+  "training_epochs": 51,
+  "epoch": 51,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.06791833307255399,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7000150706109356,
+  "validation_acc": 0.6338252314814815,
+  "validation_no_result": 0.11530671296296297,
+  "validation_loss": 1.1899462565779686,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_52.json b/metrics_epoch_52.json
new file mode 100644
index 0000000000000000000000000000000000000000..56765991425b447ee92b9a966b99c6b35e0fd5db
--- /dev/null
+++ b/metrics_epoch_52.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:19:04.053589",
+  "training_start_epoch": 0,
+  "training_epochs": 52,
+  "epoch": 52,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.06623337922448462,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.6926042777657853,
+  "validation_acc": 0.6064814814814815,
+  "validation_no_result": 0.1205150462962963,
+  "validation_loss": 1.2020933479070663,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_53.json b/metrics_epoch_53.json
new file mode 100644
index 0000000000000000000000000000000000000000..af5be9f641fd821e25f07c0264be6457208f8d03
--- /dev/null
+++ b/metrics_epoch_53.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:20:28.089866",
+  "training_start_epoch": 0,
+  "training_epochs": 53,
+  "epoch": 53,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.05605946074832569,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7128349028777587,
+  "validation_acc": 0.6247106481481481,
+  "validation_no_result": 0.12311921296296297,
+  "validation_loss": 1.1966175511479378,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_54.json b/metrics_epoch_54.json
new file mode 100644
index 0000000000000000000000000000000000000000..ececb369e3fa347d873ec31cc2fba793f360fc82
--- /dev/null
+++ b/metrics_epoch_54.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:22:02.835606",
+  "training_start_epoch": 0,
+  "training_epochs": 54,
+  "epoch": 54,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.0485708721998063,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7074252651303048,
+  "validation_acc": 0.6273148148148148,
+  "validation_no_result": 0.11791087962962964,
+  "validation_loss": 1.226130726436774,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_55.json b/metrics_epoch_55.json
new file mode 100644
index 0000000000000000000000000000000000000000..17d94e3b595a9060eaeb90fdac71647c4bccfdea
--- /dev/null
+++ b/metrics_epoch_55.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:23:37.218166",
+  "training_start_epoch": 0,
+  "training_epochs": 55,
+  "epoch": 55,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.04777379193427888,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7094565384091441,
+  "validation_acc": 0.6195023148148148,
+  "validation_no_result": 0.1166087962962963,
+  "validation_loss": 1.2262406672040622,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_56.json b/metrics_epoch_56.json
new file mode 100644
index 0000000000000000000000000000000000000000..803cb0ed767feea1c3fff108831d04c423f8b511
--- /dev/null
+++ b/metrics_epoch_56.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:25:10.710503",
+  "training_start_epoch": 0,
+  "training_epochs": 56,
+  "epoch": 56,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.04607348882339218,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7075218487475787,
+  "validation_acc": 0.634548611111111,
+  "validation_no_result": 0.1087962962962963,
+  "validation_loss": 1.247634395956993,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_57.json b/metrics_epoch_57.json
new file mode 100644
index 0000000000000000000000000000000000000000..4e482a19ace855a2d2a2086c808b8e41fca154b6
--- /dev/null
+++ b/metrics_epoch_57.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:26:59.135881",
+  "training_start_epoch": 0,
+  "training_epochs": 57,
+  "epoch": 57,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.043339876017787236,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7141783708143784,
+  "validation_acc": 0.6208043981481481,
+  "validation_no_result": 0.1244212962962963,
+  "validation_loss": 1.245709516108036,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_58.json b/metrics_epoch_58.json
new file mode 100644
index 0000000000000000000000000000000000000000..f55dabbf28f43c7c01c8466e80d3c9512a35f583
--- /dev/null
+++ b/metrics_epoch_58.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:28:48.198869",
+  "training_start_epoch": 0,
+  "training_epochs": 58,
+  "epoch": 58,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.04280694399706342,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7229747151247405,
+  "validation_acc": 0.6260127314814815,
+  "validation_no_result": 0.12572337962962962,
+  "validation_loss": 1.273095856110255,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_59.json b/metrics_epoch_59.json
new file mode 100644
index 0000000000000000000000000000000000000000..be8492708c5802377fcb09a0ceb0d6b25ad187e6
--- /dev/null
+++ b/metrics_epoch_59.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:30:38.972107",
+  "training_start_epoch": 0,
+  "training_epochs": 59,
+  "epoch": 59,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.038086464679376646,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.72395258707303,
+  "validation_acc": 0.6273148148148148,
+  "validation_no_result": 0.12572337962962962,
+  "validation_loss": 1.263894572854042,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_6.json b/metrics_epoch_6.json
new file mode 100644
index 0000000000000000000000000000000000000000..4cbac41dfc0f798259be16022b38433341954641
--- /dev/null
+++ b/metrics_epoch_6.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 6,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:10:08.412326",
+  "training_start_epoch": 0,
+  "training_epochs": 6,
+  "epoch": 6,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.8050483654845845,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.348359876044927,
+  "validation_acc": 0.44429976851851855,
+  "validation_no_result": 0.13020833333333334,
+  "validation_loss": 0.8393951679269472,
+  "best_validation_BLEU": 0.348359876044927,
+  "best_validation_acc": 0.44429976851851855,
+  "best_validation_no_result": 0.13020833333333334,
+  "best_validation_loss": 0.8393951679269472
+}
\ No newline at end of file
diff --git a/metrics_epoch_60.json b/metrics_epoch_60.json
new file mode 100644
index 0000000000000000000000000000000000000000..b04d106d8335521a63897c83df29afdcf24f50c5
--- /dev/null
+++ b/metrics_epoch_60.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:32:28.819014",
+  "training_start_epoch": 0,
+  "training_epochs": 60,
+  "epoch": 60,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.0358703440901908,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7278559978588297,
+  "validation_acc": 0.6351273148148148,
+  "validation_no_result": 0.12311921296296297,
+  "validation_loss": 1.268241671224435,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_61.json b/metrics_epoch_61.json
new file mode 100644
index 0000000000000000000000000000000000000000..164405d096b0436b66f4717af4673d1933523409
--- /dev/null
+++ b/metrics_epoch_61.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:34:18.148783",
+  "training_start_epoch": 0,
+  "training_epochs": 61,
+  "epoch": 61,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.03487256574021144,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7200914487496768,
+  "validation_acc": 0.6397569444444444,
+  "validation_no_result": 0.11530671296296297,
+  "validation_loss": 1.2846094022194545,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_62.json b/metrics_epoch_62.json
new file mode 100644
index 0000000000000000000000000000000000000000..29415e02672e4f716cdf1be0fc77faec5a7b1c5c
--- /dev/null
+++ b/metrics_epoch_62.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:36:06.579016",
+  "training_start_epoch": 0,
+  "training_epochs": 62,
+  "epoch": 62,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.034294218273664065,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7241165720152878,
+  "validation_acc": 0.6168981481481481,
+  "validation_no_result": 0.12181712962962964,
+  "validation_loss": 1.2769825334350269,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_63.json b/metrics_epoch_63.json
new file mode 100644
index 0000000000000000000000000000000000000000..ecb2eec8a0cda724aa28921bb74d2da5b86ebf98
--- /dev/null
+++ b/metrics_epoch_63.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 46,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:37:56.336034",
+  "training_start_epoch": 0,
+  "training_epochs": 63,
+  "epoch": 63,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.03228020341220227,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7154226129528386,
+  "validation_acc": 0.6325231481481481,
+  "validation_no_result": 0.12572337962962962,
+  "validation_loss": 1.2909877225756645,
+  "best_validation_BLEU": 0.6984141302081079,
+  "best_validation_acc": 0.6416377314814815,
+  "best_validation_no_result": 0.11530671296296297,
+  "best_validation_loss": 1.121234434346358
+}
\ No newline at end of file
diff --git a/metrics_epoch_64.json b/metrics_epoch_64.json
new file mode 100644
index 0000000000000000000000000000000000000000..6401fb264ec49527f9a0308c00afe60361310cc6
--- /dev/null
+++ b/metrics_epoch_64.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 64,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:39:46.223809",
+  "training_start_epoch": 0,
+  "training_epochs": 64,
+  "epoch": 64,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.0319976161996072,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7061484056297785,
+  "validation_acc": 0.6449652777777778,
+  "validation_no_result": 0.10416666666666667,
+  "validation_loss": 1.2931284854809444,
+  "best_validation_BLEU": 0.7061484056297785,
+  "best_validation_acc": 0.6449652777777778,
+  "best_validation_no_result": 0.10416666666666667,
+  "best_validation_loss": 1.2931284854809444
+}
\ No newline at end of file
diff --git a/metrics_epoch_65.json b/metrics_epoch_65.json
new file mode 100644
index 0000000000000000000000000000000000000000..34cba7ef05dc499e607bc72d3c0cc7bdf908e5da
--- /dev/null
+++ b/metrics_epoch_65.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:42:00.346761",
+  "training_start_epoch": 0,
+  "training_epochs": 65,
+  "epoch": 65,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.030482412044974892,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7229897065328674,
+  "validation_acc": 0.650173611111111,
+  "validation_no_result": 0.11921296296296297,
+  "validation_loss": 1.3042563870549202,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_66.json b/metrics_epoch_66.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d2432d4771ff44cb94c889055758623dffafac2
--- /dev/null
+++ b/metrics_epoch_66.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:44:12.486951",
+  "training_start_epoch": 0,
+  "training_epochs": 66,
+  "epoch": 66,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.02929827385497364,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7132230420457545,
+  "validation_acc": 0.6364293981481481,
+  "validation_no_result": 0.12181712962962964,
+  "validation_loss": 1.3180407658219337,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_67.json b/metrics_epoch_67.json
new file mode 100644
index 0000000000000000000000000000000000000000..3f5779cf261f0b6f3caeca3f91e965d134c2660c
--- /dev/null
+++ b/metrics_epoch_67.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:46:00.645473",
+  "training_start_epoch": 0,
+  "training_epochs": 67,
+  "epoch": 67,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.029336183111776003,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7220749180453158,
+  "validation_acc": 0.6338252314814815,
+  "validation_no_result": 0.11791087962962964,
+  "validation_loss": 1.3172537262241046,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_68.json b/metrics_epoch_68.json
new file mode 100644
index 0000000000000000000000000000000000000000..7511614a7e76fcab65e95d98fb04d8194dae9e46
--- /dev/null
+++ b/metrics_epoch_68.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:48:07.763030",
+  "training_start_epoch": 0,
+  "training_epochs": 68,
+  "epoch": 68,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.028704730751500887,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7160570303153385,
+  "validation_acc": 0.630642361111111,
+  "validation_no_result": 0.12311921296296297,
+  "validation_loss": 1.3356067761778831,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_69.json b/metrics_epoch_69.json
new file mode 100644
index 0000000000000000000000000000000000000000..799dd891fbd8ddd8a6109f52f0ef227fffcf421a
--- /dev/null
+++ b/metrics_epoch_69.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:49:56.819043",
+  "training_start_epoch": 0,
+  "training_epochs": 69,
+  "epoch": 69,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.027822929832407018,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7234562018857141,
+  "validation_acc": 0.6403356481481481,
+  "validation_no_result": 0.11400462962962964,
+  "validation_loss": 1.3301840697725613,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_7.json b/metrics_epoch_7.json
new file mode 100644
index 0000000000000000000000000000000000000000..a18479c5a26b2628070d419690a615bf12bc9853
--- /dev/null
+++ b/metrics_epoch_7.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 7,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:11:31.753036",
+  "training_start_epoch": 0,
+  "training_epochs": 7,
+  "epoch": 7,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.7711158113046126,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.37835165992261416,
+  "validation_acc": 0.45847800925925924,
+  "validation_no_result": 0.13483796296296297,
+  "validation_loss": 0.8319797093669573,
+  "best_validation_BLEU": 0.37835165992261416,
+  "best_validation_acc": 0.45847800925925924,
+  "best_validation_no_result": 0.13483796296296297,
+  "best_validation_loss": 0.8319797093669573
+}
\ No newline at end of file
diff --git a/metrics_epoch_70.json b/metrics_epoch_70.json
new file mode 100644
index 0000000000000000000000000000000000000000..6a03ecaabee25329a4676f55d8d83b3d57f719a7
--- /dev/null
+++ b/metrics_epoch_70.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:51:46.840089",
+  "training_start_epoch": 0,
+  "training_epochs": 70,
+  "epoch": 70,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.02637631700966846,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7172205941953215,
+  "validation_acc": 0.6442418981481481,
+  "validation_no_result": 0.1087962962962963,
+  "validation_loss": 1.3317091191808383,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_71.json b/metrics_epoch_71.json
new file mode 100644
index 0000000000000000000000000000000000000000..da908f82607072cd4fc0546e569f243b08016c56
--- /dev/null
+++ b/metrics_epoch_71.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:53:36.001050",
+  "training_start_epoch": 0,
+  "training_epochs": 71,
+  "epoch": 71,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.025984370251270858,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7123290193861502,
+  "validation_acc": 0.6416377314814815,
+  "validation_no_result": 0.11791087962962964,
+  "validation_loss": 1.3444155653317769,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_72.json b/metrics_epoch_72.json
new file mode 100644
index 0000000000000000000000000000000000000000..35eb7cd7b3ad992796cfb82fdfe9fce035c84891
--- /dev/null
+++ b/metrics_epoch_72.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:55:23.144361",
+  "training_start_epoch": 0,
+  "training_epochs": 72,
+  "epoch": 72,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.024248589245094494,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7243629763995799,
+  "validation_acc": 0.638454861111111,
+  "validation_no_result": 0.12109375,
+  "validation_loss": 1.353646678229173,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_73.json b/metrics_epoch_73.json
new file mode 100644
index 0000000000000000000000000000000000000000..8806ba150c7294107612dea24b3e2262b528a358
--- /dev/null
+++ b/metrics_epoch_73.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:57:17.121575",
+  "training_start_epoch": 0,
+  "training_epochs": 73,
+  "epoch": 73,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.02305317973210053,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7256423519179254,
+  "validation_acc": 0.650173611111111,
+  "validation_no_result": 0.11458333333333333,
+  "validation_loss": 1.3602836256225903,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_74.json b/metrics_epoch_74.json
new file mode 100644
index 0000000000000000000000000000000000000000..ecb77865eec387146a15e10dcf025fb508443f4b
--- /dev/null
+++ b/metrics_epoch_74.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "1:59:09.371887",
+  "training_start_epoch": 0,
+  "training_epochs": 74,
+  "epoch": 74,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.0228921974839812,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7241301306811996,
+  "validation_acc": 0.6436631944444444,
+  "validation_no_result": 0.11848958333333333,
+  "validation_loss": 1.3589155599474907,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_75.json b/metrics_epoch_75.json
new file mode 100644
index 0000000000000000000000000000000000000000..cb3ca05fefe69defb784d0ffb6690891843475ae
--- /dev/null
+++ b/metrics_epoch_75.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:00:56.945114",
+  "training_start_epoch": 0,
+  "training_epochs": 75,
+  "epoch": 75,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.02282776189121333,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7240887798169782,
+  "validation_acc": 0.6325231481481481,
+  "validation_no_result": 0.12962962962962962,
+  "validation_loss": 1.359854633609454,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_76.json b/metrics_epoch_76.json
new file mode 100644
index 0000000000000000000000000000000000000000..14178d843052480bed68a6934a528b1cd00cd4b8
--- /dev/null
+++ b/metrics_epoch_76.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:02:45.072033",
+  "training_start_epoch": 0,
+  "training_epochs": 76,
+  "epoch": 76,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.021604217978363687,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7266012365004841,
+  "validation_acc": 0.6495949074074074,
+  "validation_no_result": 0.11979166666666667,
+  "validation_loss": 1.364431341489156,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_77.json b/metrics_epoch_77.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c5e6ab887179845e3d4e5e3e6f63a609661df34
--- /dev/null
+++ b/metrics_epoch_77.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:04:34.711722",
+  "training_start_epoch": 0,
+  "training_epochs": 77,
+  "epoch": 77,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.01969964803111824,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7210583397435678,
+  "validation_acc": 0.6482928240740741,
+  "validation_no_result": 0.11979166666666667,
+  "validation_loss": 1.365205739935239,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_78.json b/metrics_epoch_78.json
new file mode 100644
index 0000000000000000000000000000000000000000..cce0dad25889c33b93e1b0dfbda553f9b23884dd
--- /dev/null
+++ b/metrics_epoch_78.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:06:23.960886",
+  "training_start_epoch": 0,
+  "training_epochs": 78,
+  "epoch": 78,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.02024272994815626,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.724350251415758,
+  "validation_acc": 0.6391782407407408,
+  "validation_no_result": 0.11979166666666667,
+  "validation_loss": 1.3815037235617638,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_79.json b/metrics_epoch_79.json
new file mode 100644
index 0000000000000000000000000000000000000000..662042e9d4f8854050f8afc5408bf615dafef32b
--- /dev/null
+++ b/metrics_epoch_79.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 65,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:08:11.677141",
+  "training_start_epoch": 0,
+  "training_epochs": 79,
+  "epoch": 79,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.02087932777252387,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7269796234555643,
+  "validation_acc": 0.6495949074074074,
+  "validation_no_result": 0.11328125,
+  "validation_loss": 1.376489485303561,
+  "best_validation_BLEU": 0.7229897065328674,
+  "best_validation_acc": 0.650173611111111,
+  "best_validation_no_result": 0.11921296296296297,
+  "best_validation_loss": 1.3042563870549202
+}
\ No newline at end of file
diff --git a/metrics_epoch_8.json b/metrics_epoch_8.json
new file mode 100644
index 0000000000000000000000000000000000000000..88c2badcfc13c6981487926d4141accfb80e927d
--- /dev/null
+++ b/metrics_epoch_8.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 8,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:12:54.325766",
+  "training_start_epoch": 0,
+  "training_epochs": 8,
+  "epoch": 8,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.7373076503927057,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.38077241418338464,
+  "validation_acc": 0.48321759259259256,
+  "validation_no_result": 0.11140046296296297,
+  "validation_loss": 0.7973246946930885,
+  "best_validation_BLEU": 0.38077241418338464,
+  "best_validation_acc": 0.48321759259259256,
+  "best_validation_no_result": 0.11140046296296297,
+  "best_validation_loss": 0.7973246946930885
+}
\ No newline at end of file
diff --git a/metrics_epoch_80.json b/metrics_epoch_80.json
new file mode 100644
index 0000000000000000000000000000000000000000..506b375aed17b1041cab761d4bb2dd4e78edd197
--- /dev/null
+++ b/metrics_epoch_80.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:10:07.761881",
+  "training_start_epoch": 0,
+  "training_epochs": 80,
+  "epoch": 80,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.019872400206937032,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7245620136488832,
+  "validation_acc": 0.6508969907407408,
+  "validation_no_result": 0.11067708333333333,
+  "validation_loss": 1.3817762682835262,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_81.json b/metrics_epoch_81.json
new file mode 100644
index 0000000000000000000000000000000000000000..882753082e288675b709270335dbda80e5e72e5b
--- /dev/null
+++ b/metrics_epoch_81.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:12:14.064481",
+  "training_start_epoch": 0,
+  "training_epochs": 81,
+  "epoch": 81,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.017521460680291055,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7238012700329899,
+  "validation_acc": 0.6443865740740741,
+  "validation_no_result": 0.11588541666666667,
+  "validation_loss": 1.3830252140760422,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_82.json b/metrics_epoch_82.json
new file mode 100644
index 0000000000000000000000000000000000000000..4b1fcec2e3b680ec560318ec9f93bf7db3abe3b7
--- /dev/null
+++ b/metrics_epoch_82.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:14:00.992742",
+  "training_start_epoch": 0,
+  "training_epochs": 82,
+  "epoch": 82,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.018971136305481195,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7267779633348508,
+  "validation_acc": 0.6495949074074074,
+  "validation_no_result": 0.12369791666666667,
+  "validation_loss": 1.3803121149539948,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_83.json b/metrics_epoch_83.json
new file mode 100644
index 0000000000000000000000000000000000000000..822d40908d984794299c9dc836a48e43d69bee9a
--- /dev/null
+++ b/metrics_epoch_83.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:15:54.119298",
+  "training_start_epoch": 0,
+  "training_epochs": 83,
+  "epoch": 83,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.018187626988881014,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7325209776549473,
+  "validation_acc": 0.6508969907407408,
+  "validation_no_result": 0.11328125,
+  "validation_loss": 1.388483499487241,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_84.json b/metrics_epoch_84.json
new file mode 100644
index 0000000000000000000000000000000000000000..c4837fe2eba8a70af0b64acb7a4be08eb8a8b614
--- /dev/null
+++ b/metrics_epoch_84.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:17:43.139008",
+  "training_start_epoch": 0,
+  "training_epochs": 84,
+  "epoch": 84,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.018349176162684507,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7256516352666668,
+  "validation_acc": 0.6397569444444444,
+  "validation_no_result": 0.11197916666666667,
+  "validation_loss": 1.3886348779002826,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_85.json b/metrics_epoch_85.json
new file mode 100644
index 0000000000000000000000000000000000000000..76373fe4e4730b4dce8b8269e211fd26d5d4d126
--- /dev/null
+++ b/metrics_epoch_85.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:19:35.530588",
+  "training_start_epoch": 0,
+  "training_epochs": 85,
+  "epoch": 85,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.018646015739068388,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7261286762309054,
+  "validation_acc": 0.6404803240740741,
+  "validation_no_result": 0.11067708333333333,
+  "validation_loss": 1.3930509214599927,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_86.json b/metrics_epoch_86.json
new file mode 100644
index 0000000000000000000000000000000000000000..a68a812a285ffcd27cf6c69bb49dc5716486213b
--- /dev/null
+++ b/metrics_epoch_86.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:21:31.794157",
+  "training_start_epoch": 0,
+  "training_epochs": 86,
+  "epoch": 86,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.017673088898035615,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7269483961768961,
+  "validation_acc": 0.6443865740740741,
+  "validation_no_result": 0.11458333333333333,
+  "validation_loss": 1.3963286578655243,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_87.json b/metrics_epoch_87.json
new file mode 100644
index 0000000000000000000000000000000000000000..b8a310ea0ed3ffadd2b4235c00fddbcbaf621c9a
--- /dev/null
+++ b/metrics_epoch_87.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:23:18.744182",
+  "training_start_epoch": 0,
+  "training_epochs": 87,
+  "epoch": 87,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.016832234435291454,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7265608515756667,
+  "validation_acc": 0.6469907407407408,
+  "validation_no_result": 0.11197916666666667,
+  "validation_loss": 1.397053025662899,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_88.json b/metrics_epoch_88.json
new file mode 100644
index 0000000000000000000000000000000000000000..1a943fd8f6a1a4db9b0d7cda1aa70603927f1eb6
--- /dev/null
+++ b/metrics_epoch_88.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:25:07.031501",
+  "training_start_epoch": 0,
+  "training_epochs": 88,
+  "epoch": 88,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.016711718893863938,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.724987463449164,
+  "validation_acc": 0.6443865740740741,
+  "validation_no_result": 0.11588541666666667,
+  "validation_loss": 1.3939060419797897,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_89.json b/metrics_epoch_89.json
new file mode 100644
index 0000000000000000000000000000000000000000..165e02c8e6299c469ccc10e31eba6f102b2dd7f1
--- /dev/null
+++ b/metrics_epoch_89.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:26:55.180809",
+  "training_start_epoch": 0,
+  "training_epochs": 89,
+  "epoch": 89,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.01651661289771172,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7226806914262347,
+  "validation_acc": 0.6352719907407408,
+  "validation_no_result": 0.1171875,
+  "validation_loss": 1.4012814511855443,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_9.json b/metrics_epoch_9.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2cd9af4758482583afa788f7249c4b5836a6347
--- /dev/null
+++ b/metrics_epoch_9.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 8,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17660,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "0:14:18.475761",
+  "training_start_epoch": 0,
+  "training_epochs": 9,
+  "epoch": 9,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.7182401624592868,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17660,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.35268137885590434,
+  "validation_acc": 0.46440972222222227,
+  "validation_no_result": 0.11530671296296297,
+  "validation_loss": 0.8008881782492002,
+  "best_validation_BLEU": 0.38077241418338464,
+  "best_validation_acc": 0.48321759259259256,
+  "best_validation_no_result": 0.11140046296296297,
+  "best_validation_loss": 0.7973246946930885
+}
\ No newline at end of file
diff --git a/metrics_epoch_90.json b/metrics_epoch_90.json
new file mode 100644
index 0000000000000000000000000000000000000000..0249de0ccdb6cbbd9966a87c24090a914fe9196c
--- /dev/null
+++ b/metrics_epoch_90.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:28:42.595687",
+  "training_start_epoch": 0,
+  "training_epochs": 90,
+  "epoch": 90,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.01537049892358482,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.727213295887757,
+  "validation_acc": 0.6417824074074074,
+  "validation_no_result": 0.11848958333333333,
+  "validation_loss": 1.4017266556620598,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_91.json b/metrics_epoch_91.json
new file mode 100644
index 0000000000000000000000000000000000000000..978e10a723d10d925f4f96ea90c34c0711fea504
--- /dev/null
+++ b/metrics_epoch_91.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:30:29.772405",
+  "training_start_epoch": 0,
+  "training_epochs": 91,
+  "epoch": 91,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.01567524956031279,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7263786661247342,
+  "validation_acc": 0.6378761574074074,
+  "validation_no_result": 0.1171875,
+  "validation_loss": 1.4088865220546722,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_92.json b/metrics_epoch_92.json
new file mode 100644
index 0000000000000000000000000000000000000000..d475ce13aa32f7636125270e0ee289adca491517
--- /dev/null
+++ b/metrics_epoch_92.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:32:16.238885",
+  "training_start_epoch": 0,
+  "training_epochs": 92,
+  "epoch": 92,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.015533217218365859,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7285192584342107,
+  "validation_acc": 0.6332465277777778,
+  "validation_no_result": 0.12572337962962962,
+  "validation_loss": 1.4104147876302402,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_93.json b/metrics_epoch_93.json
new file mode 100644
index 0000000000000000000000000000000000000000..4fb60e747cbeedef5b4a10d35e1fd0046631e88a
--- /dev/null
+++ b/metrics_epoch_93.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:34:06.867258",
+  "training_start_epoch": 0,
+  "training_epochs": 93,
+  "epoch": 93,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.015072003692727197,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7285539443710713,
+  "validation_acc": 0.6365740740740741,
+  "validation_no_result": 0.12239583333333333,
+  "validation_loss": 1.4112620949745178,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_94.json b/metrics_epoch_94.json
new file mode 100644
index 0000000000000000000000000000000000000000..ce1612ab2b25d13d29209486a3b9f8c3aea434e2
--- /dev/null
+++ b/metrics_epoch_94.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:35:56.259754",
+  "training_start_epoch": 0,
+  "training_epochs": 94,
+  "epoch": 94,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.014531611329452559,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7277052682532325,
+  "validation_acc": 0.6365740740740741,
+  "validation_no_result": 0.11848958333333333,
+  "validation_loss": 1.4106633414824803,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_95.json b/metrics_epoch_95.json
new file mode 100644
index 0000000000000000000000000000000000000000..657d2d0091fbd0e0b3af0fc76b603f2a8c55fee4
--- /dev/null
+++ b/metrics_epoch_95.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:37:45.282153",
+  "training_start_epoch": 0,
+  "training_epochs": 95,
+  "epoch": 95,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.014618165079842914,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7266067850908157,
+  "validation_acc": 0.6391782407407408,
+  "validation_no_result": 0.12239583333333333,
+  "validation_loss": 1.411758229136467,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_96.json b/metrics_epoch_96.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b304064b332cac65296663e25a3d8ef27ca246e
--- /dev/null
+++ b/metrics_epoch_96.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:39:35.233646",
+  "training_start_epoch": 0,
+  "training_epochs": 96,
+  "epoch": 96,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.015245335367084905,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7297971727977184,
+  "validation_acc": 0.6404803240740741,
+  "validation_no_result": 0.1171875,
+  "validation_loss": 1.416468304892381,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_97.json b/metrics_epoch_97.json
new file mode 100644
index 0000000000000000000000000000000000000000..0938618be5b0782ee645ab1d26c3dc0dc20b2f5b
--- /dev/null
+++ b/metrics_epoch_97.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:41:29.533201",
+  "training_start_epoch": 0,
+  "training_epochs": 97,
+  "epoch": 97,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.014791254534132102,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7301994285895989,
+  "validation_acc": 0.6404803240740741,
+  "validation_no_result": 0.11588541666666667,
+  "validation_loss": 1.4220996722579002,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_98.json b/metrics_epoch_98.json
new file mode 100644
index 0000000000000000000000000000000000000000..06f768ccdaa6d5cc6e48361815b1964310c284b8
--- /dev/null
+++ b/metrics_epoch_98.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:43:13.505625",
+  "training_start_epoch": 0,
+  "training_epochs": 98,
+  "epoch": 98,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.015271843813190406,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7290488167601582,
+  "validation_acc": 0.638454861111111,
+  "validation_no_result": 0.11921296296296297,
+  "validation_loss": 1.4180005093415577,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/metrics_epoch_99.json b/metrics_epoch_99.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b30e75c4c9c194d6135ad04470ef6967d768c65
--- /dev/null
+++ b/metrics_epoch_99.json
@@ -0,0 +1,34 @@
+{
+  "best_epoch": 80,
+  "peak_cpu_memory_MB": 4772.668,
+  "peak_gpu_0_memory_MB": 5,
+  "peak_gpu_1_memory_MB": 5,
+  "peak_gpu_2_memory_MB": 5,
+  "peak_gpu_3_memory_MB": 5,
+  "peak_gpu_4_memory_MB": 17662,
+  "peak_gpu_5_memory_MB": 5,
+  "peak_gpu_6_memory_MB": 5,
+  "training_duration": "2:45:01.064397",
+  "training_start_epoch": 0,
+  "training_epochs": 99,
+  "epoch": 99,
+  "training_acc": 0,
+  "training_no_result": 0,
+  "training_loss": 0.013839077864858237,
+  "training_cpu_memory_MB": 4772.668,
+  "training_gpu_0_memory_MB": 5,
+  "training_gpu_1_memory_MB": 5,
+  "training_gpu_2_memory_MB": 5,
+  "training_gpu_3_memory_MB": 5,
+  "training_gpu_4_memory_MB": 17662,
+  "training_gpu_5_memory_MB": 5,
+  "training_gpu_6_memory_MB": 5,
+  "validation_BLEU": 0.7266542542114865,
+  "validation_acc": 0.6417824074074074,
+  "validation_no_result": 0.11848958333333333,
+  "validation_loss": 1.416664329667886,
+  "best_validation_BLEU": 0.7245620136488832,
+  "best_validation_acc": 0.6508969907407408,
+  "best_validation_no_result": 0.11067708333333333,
+  "best_validation_loss": 1.3817762682835262
+}
\ No newline at end of file
diff --git a/stderr.log b/stderr.log
new file mode 100644
index 0000000000000000000000000000000000000000..5a2036b4fac6e6a58185602a2414b55d324d6f1f
--- /dev/null
+++ b/stderr.log
@@ -0,0 +1,14089 @@
+0it [00:00, ?it/s]
+1it [00:00,  1.61it/s]
+94it [00:00, 174.95it/s]
+198it [00:00, 356.41it/s]
+275it [00:01, 354.59it/s]
+379it [00:01, 495.35it/s]
+489it [00:01, 631.21it/s]
+593it [00:01, 730.42it/s]
+690it [00:01, 791.65it/s]
+796it [00:01, 862.25it/s]
+904it [00:01, 922.64it/s]
+1005it [00:01, 944.22it/s]
+1111it [00:01, 977.44it/s]
+1214it [00:02, 718.60it/s]
+1318it [00:02, 792.41it/s]
+1425it [00:02, 859.05it/s]
+1531it [00:02, 908.72it/s]
+1633it [00:02, 935.83it/s]
+1737it [00:02, 963.82it/s]
+1840it [00:02, 981.89it/s]
+1945it [00:02, 999.55it/s]
+2047it [00:03, 695.24it/s]
+2147it [00:03, 761.38it/s]
+2248it [00:03, 819.06it/s]
+2353it [00:03, 877.41it/s]
+2457it [00:03, 920.62it/s]
+2560it [00:03, 949.07it/s]
+2667it [00:03, 982.00it/s]
+2769it [00:03, 989.11it/s]
+2874it [00:03, 1006.73it/s]
+2979it [00:03, 1018.50it/s]
+3083it [00:04, 650.72it/s] 
+3188it [00:04, 734.24it/s]
+3290it [00:04, 799.30it/s]
+3398it [00:04, 867.93it/s]
+3499it [00:04, 753.19it/s]
+
+0it [00:00, ?it/s]
+1it [00:00,  4.89it/s]
+120it [00:00, 491.20it/s]
+226it [00:00, 699.58it/s]
+329it [00:00, 811.20it/s]
+434it [00:00, 889.14it/s]
+535it [00:00, 927.37it/s]
+640it [00:00, 963.25it/s]
+745it [00:00, 820.29it/s]
+
+0it [00:00, ?it/s]
+1it [00:00,  5.21it/s]
+28it [00:00, 77.14it/s]
+130it [00:00, 345.36it/s]
+232it [00:00, 539.45it/s]
+336it [00:00, 682.78it/s]
+440it [00:00, 786.56it/s]
+542it [00:00, 855.06it/s]
+650it [00:01, 918.31it/s]
+754it [00:01, 953.67it/s]
+754it [00:01, 671.37it/s]
+
+0it [00:00, ?it/s]
+4998it [00:00, 62045.70it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 3.7836 ||:   1%|          | 1/110 [00:01<02:02,  1.13s/it]
+acc: 0.0000, no_result: 0.0000, loss: 3.6138 ||:   2%|1         | 2/110 [00:01<01:38,  1.10it/s]
+acc: 0.0000, no_result: 0.0000, loss: 3.4255 ||:   3%|2         | 3/110 [00:02<01:28,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 3.2747 ||:   4%|3         | 4/110 [00:03<01:27,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 3.1457 ||:   5%|4         | 5/110 [00:04<01:22,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 3.0286 ||:   5%|5         | 6/110 [00:04<01:19,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.9574 ||:   6%|6         | 7/110 [00:05<01:15,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.8714 ||:   7%|7         | 8/110 [00:06<01:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.8230 ||:   8%|8         | 9/110 [00:07<01:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.7761 ||:   9%|9         | 10/110 [00:07<01:14,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.7374 ||:  10%|#         | 11/110 [00:08<01:15,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.6969 ||:  11%|#         | 12/110 [00:09<01:12,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.6614 ||:  12%|#1        | 13/110 [00:10<01:14,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.6413 ||:  13%|#2        | 14/110 [00:10<01:10,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.6257 ||:  14%|#3        | 15/110 [00:11<01:10,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.6011 ||:  15%|#4        | 16/110 [00:12<01:09,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.5783 ||:  15%|#5        | 17/110 [00:12<01:07,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.5543 ||:  16%|#6        | 18/110 [00:13<01:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.5334 ||:  17%|#7        | 19/110 [00:14<01:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.5087 ||:  18%|#8        | 20/110 [00:14<01:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.4828 ||:  19%|#9        | 21/110 [00:15<01:03,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.4687 ||:  20%|##        | 22/110 [00:16<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.4518 ||:  21%|##        | 23/110 [00:17<01:00,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.4297 ||:  22%|##1       | 24/110 [00:17<00:59,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.4151 ||:  23%|##2       | 25/110 [00:18<00:56,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.3996 ||:  24%|##3       | 26/110 [00:18<00:54,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.3818 ||:  25%|##4       | 27/110 [00:19<00:56,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.3708 ||:  25%|##5       | 28/110 [00:20<00:56,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.3537 ||:  26%|##6       | 29/110 [00:21<00:58,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.3412 ||:  27%|##7       | 30/110 [00:21<00:58,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.3308 ||:  28%|##8       | 31/110 [00:22<00:55,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.3204 ||:  29%|##9       | 32/110 [00:23<00:55,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.3124 ||:  30%|###       | 33/110 [00:24<00:53,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.3043 ||:  31%|###       | 34/110 [00:24<00:56,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.2882 ||:  32%|###1      | 35/110 [00:25<00:53,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.2734 ||:  33%|###2      | 36/110 [00:26<00:52,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.2637 ||:  34%|###3      | 37/110 [00:26<00:51,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.2462 ||:  35%|###4      | 38/110 [00:27<00:53,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.2317 ||:  35%|###5      | 39/110 [00:28<00:57,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.2253 ||:  36%|###6      | 40/110 [00:29<00:53,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.2172 ||:  37%|###7      | 41/110 [00:30<00:54,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.2070 ||:  38%|###8      | 42/110 [00:30<00:52,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.1945 ||:  39%|###9      | 43/110 [00:31<00:50,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.1821 ||:  40%|####      | 44/110 [00:32<00:50,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.1731 ||:  41%|####      | 45/110 [00:33<00:48,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.1630 ||:  42%|####1     | 46/110 [00:33<00:46,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.1450 ||:  43%|####2     | 47/110 [00:34<00:44,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.1305 ||:  44%|####3     | 48/110 [00:35<00:44,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.1184 ||:  45%|####4     | 49/110 [00:35<00:42,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.1132 ||:  45%|####5     | 50/110 [00:36<00:41,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.1014 ||:  46%|####6     | 51/110 [00:37<00:42,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.0900 ||:  47%|####7     | 52/110 [00:38<00:42,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.0767 ||:  48%|####8     | 53/110 [00:38<00:41,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.0629 ||:  49%|####9     | 54/110 [00:39<00:39,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.0551 ||:  50%|#####     | 55/110 [00:40<00:38,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.0457 ||:  51%|#####     | 56/110 [00:40<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.0368 ||:  52%|#####1    | 57/110 [00:41<00:38,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.0253 ||:  53%|#####2    | 58/110 [00:42<00:37,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.0207 ||:  54%|#####3    | 59/110 [00:43<00:37,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.0123 ||:  55%|#####4    | 60/110 [00:43<00:36,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 2.0000 ||:  55%|#####5    | 61/110 [00:44<00:34,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.9892 ||:  56%|#####6    | 62/110 [00:45<00:35,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.9799 ||:  57%|#####7    | 63/110 [00:45<00:33,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.9704 ||:  58%|#####8    | 64/110 [00:46<00:34,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.9608 ||:  59%|#####9    | 65/110 [00:47<00:33,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.9515 ||:  60%|######    | 66/110 [00:48<00:32,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.9462 ||:  61%|######    | 67/110 [00:48<00:32,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.9360 ||:  62%|######1   | 68/110 [00:49<00:31,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.9301 ||:  63%|######2   | 69/110 [00:50<00:29,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.9193 ||:  64%|######3   | 70/110 [00:51<00:28,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.9110 ||:  65%|######4   | 71/110 [00:51<00:27,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.9027 ||:  65%|######5   | 72/110 [00:52<00:27,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8942 ||:  66%|######6   | 73/110 [00:53<00:25,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8899 ||:  67%|######7   | 74/110 [00:53<00:24,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8815 ||:  68%|######8   | 75/110 [00:54<00:24,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8748 ||:  69%|######9   | 76/110 [00:55<00:24,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8693 ||:  70%|#######   | 77/110 [00:56<00:24,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8610 ||:  71%|#######   | 78/110 [00:56<00:23,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8537 ||:  72%|#######1  | 79/110 [00:57<00:23,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8520 ||:  73%|#######2  | 80/110 [00:58<00:22,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8453 ||:  74%|#######3  | 81/110 [00:58<00:20,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8390 ||:  75%|#######4  | 82/110 [00:59<00:19,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8312 ||:  75%|#######5  | 83/110 [01:00<00:18,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8268 ||:  76%|#######6  | 84/110 [01:00<00:17,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8209 ||:  77%|#######7  | 85/110 [01:01<00:17,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8129 ||:  78%|#######8  | 86/110 [01:02<00:16,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8044 ||:  79%|#######9  | 87/110 [01:03<00:15,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.8010 ||:  80%|########  | 88/110 [01:03<00:15,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7949 ||:  81%|########  | 89/110 [01:04<00:14,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7888 ||:  82%|########1 | 90/110 [01:05<00:15,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7814 ||:  83%|########2 | 91/110 [01:06<00:14,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7758 ||:  84%|########3 | 92/110 [01:06<00:13,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7696 ||:  85%|########4 | 93/110 [01:07<00:12,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7654 ||:  85%|########5 | 94/110 [01:08<00:11,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7606 ||:  86%|########6 | 95/110 [01:08<00:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7562 ||:  87%|########7 | 96/110 [01:09<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7520 ||:  88%|########8 | 97/110 [01:10<00:09,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7468 ||:  89%|########9 | 98/110 [01:11<00:08,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7414 ||:  90%|######### | 99/110 [01:11<00:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7354 ||:  91%|######### | 100/110 [01:13<00:09,  1.01it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7326 ||:  92%|#########1| 101/110 [01:14<00:08,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7268 ||:  93%|#########2| 102/110 [01:14<00:06,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7217 ||:  94%|#########3| 103/110 [01:15<00:05,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7166 ||:  95%|#########4| 104/110 [01:16<00:04,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7115 ||:  95%|#########5| 105/110 [01:16<00:03,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7079 ||:  96%|#########6| 106/110 [01:17<00:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.7040 ||:  97%|#########7| 107/110 [01:18<00:02,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.6999 ||:  98%|#########8| 108/110 [01:19<00:01,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.6945 ||:  99%|#########9| 109/110 [01:20<00:00,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.6869 ||: 100%|##########| 110/110 [01:20<00:00,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.6869 ||: 100%|##########| 110/110 [01:20<00:00,  1.37it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.2770, acc: 0.4062, no_result: 0.0312, loss: 1.0724 ||:   4%|4         | 1/24 [00:00<00:07,  2.92it/s]
+BLEU: 0.2347, acc: 0.4375, no_result: 0.0312, loss: 0.9925 ||:   8%|8         | 2/24 [00:00<00:07,  3.02it/s]
+BLEU: 0.2224, acc: 0.4479, no_result: 0.0521, loss: 1.0187 ||:  12%|#2        | 3/24 [00:00<00:06,  3.13it/s]
+BLEU: 0.2386, acc: 0.3516, no_result: 0.2109, loss: 1.1801 ||:  17%|#6        | 4/24 [00:01<00:07,  2.85it/s]
+BLEU: 0.2632, acc: 0.3875, no_result: 0.2188, loss: 1.1672 ||:  21%|##        | 5/24 [00:01<00:06,  2.80it/s]
+BLEU: 0.2644, acc: 0.3750, no_result: 0.2344, loss: 1.1964 ||:  25%|##5       | 6/24 [00:02<00:06,  2.89it/s]
+BLEU: 0.2553, acc: 0.3929, no_result: 0.2232, loss: 1.1713 ||:  29%|##9       | 7/24 [00:02<00:05,  3.03it/s]
+BLEU: 0.2529, acc: 0.3984, no_result: 0.2148, loss: 1.1611 ||:  33%|###3      | 8/24 [00:02<00:05,  3.07it/s]
+BLEU: 0.2373, acc: 0.3750, no_result: 0.2014, loss: 1.1531 ||:  38%|###7      | 9/24 [00:03<00:04,  3.06it/s]
+BLEU: 0.2386, acc: 0.3563, no_result: 0.2000, loss: 1.1747 ||:  42%|####1     | 10/24 [00:03<00:04,  2.99it/s]
+BLEU: 0.2441, acc: 0.3523, no_result: 0.1989, loss: 1.1941 ||:  46%|####5     | 11/24 [00:03<00:04,  2.86it/s]
+BLEU: 0.2468, acc: 0.3776, no_result: 0.1901, loss: 1.1750 ||:  50%|#####     | 12/24 [00:04<00:04,  2.97it/s]
+BLEU: 0.2398, acc: 0.3846, no_result: 0.1899, loss: 1.1922 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.88it/s]
+BLEU: 0.2470, acc: 0.3884, no_result: 0.1897, loss: 1.1910 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.80it/s]
+BLEU: 0.2368, acc: 0.3708, no_result: 0.2208, loss: 1.2265 ||:  62%|######2   | 15/24 [00:05<00:03,  2.60it/s]
+BLEU: 0.2315, acc: 0.3594, no_result: 0.2188, loss: 1.2459 ||:  67%|######6   | 16/24 [00:05<00:03,  2.55it/s]
+BLEU: 0.2304, acc: 0.3585, no_result: 0.2298, loss: 1.2539 ||:  71%|#######   | 17/24 [00:06<00:02,  2.59it/s]
+BLEU: 0.2293, acc: 0.3576, no_result: 0.2326, loss: 1.2478 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.59it/s]
+BLEU: 0.2323, acc: 0.3618, no_result: 0.2220, loss: 1.2289 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.67it/s]
+BLEU: 0.2328, acc: 0.3625, no_result: 0.2125, loss: 1.2160 ||:  83%|########3 | 20/24 [00:07<00:01,  2.83it/s]
+BLEU: 0.2333, acc: 0.3646, no_result: 0.2039, loss: 1.2078 ||:  88%|########7 | 21/24 [00:07<00:01,  2.84it/s]
+BLEU: 0.2324, acc: 0.3594, no_result: 0.2088, loss: 1.2081 ||:  92%|#########1| 22/24 [00:07<00:00,  2.78it/s]
+BLEU: 0.2327, acc: 0.3682, no_result: 0.2052, loss: 1.1980 ||:  96%|#########5| 23/24 [00:08<00:00,  2.87it/s]
+BLEU: 0.2332, acc: 0.3668, no_result: 0.1966, loss: 1.2028 ||: 100%|##########| 24/24 [00:08<00:00,  3.57it/s]
+BLEU: 0.2332, acc: 0.3668, no_result: 0.1966, loss: 1.2028 ||: 100%|##########| 24/24 [00:08<00:00,  2.91it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2590 ||:   1%|          | 1/110 [00:00<01:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2527 ||:   2%|1         | 2/110 [00:01<01:14,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2628 ||:   3%|2         | 3/110 [00:02<01:18,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2634 ||:   4%|3         | 4/110 [00:02<01:17,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2747 ||:   5%|4         | 5/110 [00:03<01:17,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2338 ||:   5%|5         | 6/110 [00:04<01:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2443 ||:   6%|6         | 7/110 [00:04<01:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2545 ||:   7%|7         | 8/110 [00:05<01:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2352 ||:   8%|8         | 9/110 [00:06<01:09,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2183 ||:   9%|9         | 10/110 [00:07<01:11,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2166 ||:  10%|#         | 11/110 [00:07<01:14,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2270 ||:  11%|#         | 12/110 [00:08<01:10,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2287 ||:  12%|#1        | 13/110 [00:09<01:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2213 ||:  13%|#2        | 14/110 [00:10<01:10,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2172 ||:  14%|#3        | 15/110 [00:10<01:10,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2079 ||:  15%|#4        | 16/110 [00:11<01:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.2007 ||:  15%|#5        | 17/110 [00:12<01:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1942 ||:  16%|#6        | 18/110 [00:12<01:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1921 ||:  17%|#7        | 19/110 [00:13<01:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1895 ||:  18%|#8        | 20/110 [00:14<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1873 ||:  19%|#9        | 21/110 [00:14<00:59,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1852 ||:  20%|##        | 22/110 [00:15<00:57,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1798 ||:  21%|##        | 23/110 [00:15<00:54,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1791 ||:  22%|##1       | 24/110 [00:16<00:55,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1761 ||:  23%|##2       | 25/110 [00:17<00:55,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1749 ||:  24%|##3       | 26/110 [00:18<00:59,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1788 ||:  25%|##4       | 27/110 [00:19<01:04,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1819 ||:  25%|##5       | 28/110 [00:19<00:59,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1817 ||:  26%|##6       | 29/110 [00:20<00:56,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1731 ||:  27%|##7       | 30/110 [00:21<00:55,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1717 ||:  28%|##8       | 31/110 [00:21<00:54,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1718 ||:  29%|##9       | 32/110 [00:22<00:54,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1682 ||:  30%|###       | 33/110 [00:23<00:55,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1642 ||:  31%|###       | 34/110 [00:23<00:52,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1641 ||:  32%|###1      | 35/110 [00:24<00:51,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1620 ||:  33%|###2      | 36/110 [00:25<00:50,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1620 ||:  34%|###3      | 37/110 [00:25<00:49,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1631 ||:  35%|###4      | 38/110 [00:26<00:53,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1645 ||:  35%|###5      | 39/110 [00:27<00:51,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1658 ||:  36%|###6      | 40/110 [00:28<00:48,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1656 ||:  37%|###7      | 41/110 [00:28<00:47,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1635 ||:  38%|###8      | 42/110 [00:29<00:48,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1656 ||:  39%|###9      | 43/110 [00:30<00:46,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1661 ||:  40%|####      | 44/110 [00:30<00:48,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1666 ||:  41%|####      | 45/110 [00:31<00:46,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1662 ||:  42%|####1     | 46/110 [00:32<00:44,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1671 ||:  43%|####2     | 47/110 [00:32<00:43,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1675 ||:  44%|####3     | 48/110 [00:33<00:43,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1643 ||:  45%|####4     | 49/110 [00:34<00:41,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1623 ||:  45%|####5     | 50/110 [00:35<00:42,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1649 ||:  46%|####6     | 51/110 [00:35<00:41,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1617 ||:  47%|####7     | 52/110 [00:36<00:38,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1604 ||:  48%|####8     | 53/110 [00:37<00:37,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1574 ||:  49%|####9     | 54/110 [00:37<00:36,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1576 ||:  50%|#####     | 55/110 [00:38<00:35,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1581 ||:  51%|#####     | 56/110 [00:38<00:33,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1558 ||:  52%|#####1    | 57/110 [00:39<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1564 ||:  53%|#####2    | 58/110 [00:40<00:34,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1527 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1526 ||:  55%|#####4    | 60/110 [00:41<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1530 ||:  55%|#####5    | 61/110 [00:42<00:36,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1500 ||:  56%|#####6    | 62/110 [00:43<00:34,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1487 ||:  57%|#####7    | 63/110 [00:43<00:32,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1478 ||:  58%|#####8    | 64/110 [00:44<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1459 ||:  59%|#####9    | 65/110 [00:45<00:30,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1461 ||:  60%|######    | 66/110 [00:45<00:30,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1460 ||:  61%|######    | 67/110 [00:46<00:30,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1467 ||:  62%|######1   | 68/110 [00:47<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1467 ||:  63%|######2   | 69/110 [00:48<00:29,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1454 ||:  64%|######3   | 70/110 [00:48<00:27,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1443 ||:  65%|######4   | 71/110 [00:49<00:27,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1413 ||:  65%|######5   | 72/110 [00:50<00:25,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1421 ||:  66%|######6   | 73/110 [00:50<00:26,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1417 ||:  67%|######7   | 74/110 [00:51<00:25,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1404 ||:  68%|######8   | 75/110 [00:52<00:25,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1415 ||:  69%|######9   | 76/110 [00:53<00:24,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1423 ||:  70%|#######   | 77/110 [00:53<00:23,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1400 ||:  71%|#######   | 78/110 [00:54<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1403 ||:  72%|#######1  | 79/110 [00:54<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1401 ||:  73%|#######2  | 80/110 [00:55<00:20,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1373 ||:  74%|#######3  | 81/110 [00:56<00:19,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1363 ||:  75%|#######4  | 82/110 [00:57<00:18,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1352 ||:  75%|#######5  | 83/110 [00:57<00:18,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1338 ||:  76%|#######6  | 84/110 [00:58<00:17,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1364 ||:  77%|#######7  | 85/110 [00:59<00:17,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1363 ||:  78%|#######8  | 86/110 [00:59<00:16,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1374 ||:  79%|#######9  | 87/110 [01:00<00:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1386 ||:  80%|########  | 88/110 [01:01<00:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1387 ||:  81%|########  | 89/110 [01:01<00:14,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1375 ||:  82%|########1 | 90/110 [01:03<00:19,  1.00it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1366 ||:  83%|########2 | 91/110 [01:04<00:17,  1.11it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1365 ||:  84%|########3 | 92/110 [01:04<00:14,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1343 ||:  85%|########4 | 93/110 [01:05<00:12,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1335 ||:  85%|########5 | 94/110 [01:06<00:12,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1339 ||:  86%|########6 | 95/110 [01:06<00:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1331 ||:  87%|########7 | 96/110 [01:07<00:10,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1329 ||:  88%|########8 | 97/110 [01:08<00:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1343 ||:  89%|########9 | 98/110 [01:09<00:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1328 ||:  90%|######### | 99/110 [01:09<00:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1310 ||:  91%|######### | 100/110 [01:10<00:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1302 ||:  92%|#########1| 101/110 [01:11<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1295 ||:  93%|#########2| 102/110 [01:11<00:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1289 ||:  94%|#########3| 103/110 [01:12<00:05,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1271 ||:  95%|#########4| 104/110 [01:13<00:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1260 ||:  95%|#########5| 105/110 [01:13<00:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1244 ||:  96%|#########6| 106/110 [01:14<00:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1230 ||:  97%|#########7| 107/110 [01:15<00:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1219 ||:  98%|#########8| 108/110 [01:15<00:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1214 ||:  99%|#########9| 109/110 [01:16<00:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1222 ||: 100%|##########| 110/110 [01:16<00:00,  1.75it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.1222 ||: 100%|##########| 110/110 [01:16<00:00,  1.43it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.3719, acc: 0.5000, no_result: 0.0625, loss: 0.9286 ||:   4%|4         | 1/24 [00:00<00:07,  3.00it/s]
+BLEU: 0.3900, acc: 0.5156, no_result: 0.0938, loss: 0.8600 ||:   8%|8         | 2/24 [00:00<00:07,  3.01it/s]
+BLEU: 0.3718, acc: 0.5208, no_result: 0.1042, loss: 0.8922 ||:  12%|#2        | 3/24 [00:00<00:06,  3.07it/s]
+BLEU: 0.3637, acc: 0.4844, no_result: 0.1562, loss: 1.0132 ||:  17%|#6        | 4/24 [00:01<00:06,  2.88it/s]
+BLEU: 0.4099, acc: 0.5125, no_result: 0.1625, loss: 0.9801 ||:  21%|##        | 5/24 [00:01<00:06,  2.85it/s]
+BLEU: 0.4119, acc: 0.4896, no_result: 0.1667, loss: 1.0062 ||:  25%|##5       | 6/24 [00:02<00:06,  2.96it/s]
+BLEU: 0.4039, acc: 0.4911, no_result: 0.1562, loss: 0.9934 ||:  29%|##9       | 7/24 [00:02<00:05,  3.09it/s]
+BLEU: 0.3929, acc: 0.4961, no_result: 0.1602, loss: 0.9894 ||:  33%|###3      | 8/24 [00:02<00:05,  3.10it/s]
+BLEU: 0.3862, acc: 0.4618, no_result: 0.1597, loss: 0.9879 ||:  38%|###7      | 9/24 [00:02<00:04,  3.08it/s]
+BLEU: 0.3849, acc: 0.4406, no_result: 0.1594, loss: 1.0017 ||:  42%|####1     | 10/24 [00:03<00:04,  3.02it/s]
+BLEU: 0.3868, acc: 0.4290, no_result: 0.1619, loss: 1.0117 ||:  46%|####5     | 11/24 [00:03<00:04,  2.91it/s]
+BLEU: 0.3943, acc: 0.4557, no_result: 0.1510, loss: 0.9998 ||:  50%|#####     | 12/24 [00:03<00:03,  3.02it/s]
+BLEU: 0.3809, acc: 0.4543, no_result: 0.1538, loss: 1.0156 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.91it/s]
+BLEU: 0.3830, acc: 0.4554, no_result: 0.1496, loss: 1.0195 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.84it/s]
+BLEU: 0.3636, acc: 0.4458, no_result: 0.1729, loss: 1.0533 ||:  62%|######2   | 15/24 [00:05<00:03,  2.65it/s]
+BLEU: 0.3529, acc: 0.4297, no_result: 0.1777, loss: 1.0670 ||:  67%|######6   | 16/24 [00:05<00:03,  2.59it/s]
+BLEU: 0.3483, acc: 0.4228, no_result: 0.1985, loss: 1.0759 ||:  71%|#######   | 17/24 [00:05<00:02,  2.61it/s]
+BLEU: 0.3435, acc: 0.4201, no_result: 0.2014, loss: 1.0739 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.57it/s]
+BLEU: 0.3465, acc: 0.4243, no_result: 0.1957, loss: 1.0604 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.64it/s]
+BLEU: 0.3504, acc: 0.4281, no_result: 0.1875, loss: 1.0523 ||:  83%|########3 | 20/24 [00:07<00:01,  2.76it/s]
+BLEU: 0.3517, acc: 0.4271, no_result: 0.1786, loss: 1.0445 ||:  88%|########7 | 21/24 [00:07<00:01,  2.78it/s]
+BLEU: 0.3512, acc: 0.4205, no_result: 0.1761, loss: 1.0438 ||:  92%|#########1| 22/24 [00:07<00:00,  2.75it/s]
+BLEU: 0.3539, acc: 0.4239, no_result: 0.1739, loss: 1.0360 ||:  96%|#########5| 23/24 [00:08<00:00,  2.81it/s]
+BLEU: 0.3529, acc: 0.4248, no_result: 0.1667, loss: 1.0453 ||: 100%|##########| 24/24 [00:08<00:00,  3.46it/s]
+BLEU: 0.3529, acc: 0.4248, no_result: 0.1667, loss: 1.0453 ||: 100%|##########| 24/24 [00:08<00:00,  2.91it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0546 ||:   1%|          | 1/110 [00:00<01:18,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0347 ||:   2%|1         | 2/110 [00:01<01:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0764 ||:   3%|2         | 3/110 [00:02<01:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0483 ||:   4%|3         | 4/110 [00:02<01:17,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0133 ||:   5%|4         | 5/110 [00:03<01:13,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9982 ||:   5%|5         | 6/110 [00:04<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0019 ||:   6%|6         | 7/110 [00:04<01:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0229 ||:   7%|7         | 8/110 [00:05<01:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0119 ||:   8%|8         | 9/110 [00:06<01:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0163 ||:   9%|9         | 10/110 [00:07<01:12,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0174 ||:  10%|#         | 11/110 [00:07<01:14,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0211 ||:  11%|#         | 12/110 [00:08<01:13,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0197 ||:  12%|#1        | 13/110 [00:09<01:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0177 ||:  13%|#2        | 14/110 [00:09<01:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0149 ||:  14%|#3        | 15/110 [00:10<01:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0246 ||:  15%|#4        | 16/110 [00:11<01:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0248 ||:  15%|#5        | 17/110 [00:12<01:06,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0136 ||:  16%|#6        | 18/110 [00:12<01:03,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0122 ||:  17%|#7        | 19/110 [00:13<01:05,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0124 ||:  18%|#8        | 20/110 [00:14<01:04,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0089 ||:  19%|#9        | 21/110 [00:14<01:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0092 ||:  20%|##        | 22/110 [00:15<01:03,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0105 ||:  21%|##        | 23/110 [00:16<01:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0125 ||:  22%|##1       | 24/110 [00:17<01:08,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0146 ||:  23%|##2       | 25/110 [00:18<01:08,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0195 ||:  24%|##3       | 26/110 [00:18<01:05,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0176 ||:  25%|##4       | 27/110 [00:19<01:02,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0170 ||:  25%|##5       | 28/110 [00:20<00:59,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0229 ||:  26%|##6       | 29/110 [00:21<01:04,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0237 ||:  27%|##7       | 30/110 [00:21<01:01,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0263 ||:  28%|##8       | 31/110 [00:22<01:01,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0237 ||:  29%|##9       | 32/110 [00:23<00:59,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0247 ||:  30%|###       | 33/110 [00:24<00:55,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0248 ||:  31%|###       | 34/110 [00:24<00:52,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0228 ||:  32%|###1      | 35/110 [00:25<00:53,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0203 ||:  33%|###2      | 36/110 [00:26<00:53,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0160 ||:  34%|###3      | 37/110 [00:26<00:51,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0163 ||:  35%|###4      | 38/110 [00:27<00:49,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0162 ||:  35%|###5      | 39/110 [00:28<00:47,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0155 ||:  36%|###6      | 40/110 [00:28<00:47,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0123 ||:  37%|###7      | 41/110 [00:29<00:45,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0093 ||:  38%|###8      | 42/110 [00:30<00:44,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0113 ||:  39%|###9      | 43/110 [00:30<00:48,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0125 ||:  40%|####      | 44/110 [00:31<00:47,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0103 ||:  41%|####      | 45/110 [00:32<00:46,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0130 ||:  42%|####1     | 46/110 [00:33<00:45,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0110 ||:  43%|####2     | 47/110 [00:33<00:43,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0079 ||:  44%|####3     | 48/110 [00:34<00:44,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0094 ||:  45%|####4     | 49/110 [00:35<00:43,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0115 ||:  45%|####5     | 50/110 [00:36<00:45,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0115 ||:  46%|####6     | 51/110 [00:36<00:44,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0125 ||:  47%|####7     | 52/110 [00:37<00:42,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0112 ||:  48%|####8     | 53/110 [00:38<00:40,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0115 ||:  49%|####9     | 54/110 [00:38<00:41,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0114 ||:  50%|#####     | 55/110 [00:39<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0091 ||:  51%|#####     | 56/110 [00:40<00:39,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0101 ||:  52%|#####1    | 57/110 [00:41<00:37,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0117 ||:  53%|#####2    | 58/110 [00:41<00:38,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0118 ||:  54%|#####3    | 59/110 [00:42<00:37,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0116 ||:  55%|#####4    | 60/110 [00:43<00:35,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0113 ||:  55%|#####5    | 61/110 [00:43<00:33,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0118 ||:  56%|#####6    | 62/110 [00:44<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0130 ||:  57%|#####7    | 63/110 [00:45<00:33,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0117 ||:  58%|#####8    | 64/110 [00:46<00:32,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0105 ||:  59%|#####9    | 65/110 [00:46<00:31,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0107 ||:  60%|######    | 66/110 [00:47<00:30,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0145 ||:  61%|######    | 67/110 [00:48<00:31,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0130 ||:  62%|######1   | 68/110 [00:48<00:30,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0116 ||:  63%|######2   | 69/110 [00:49<00:29,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0098 ||:  64%|######3   | 70/110 [00:50<00:29,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0106 ||:  65%|######4   | 71/110 [00:51<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0098 ||:  65%|######5   | 72/110 [00:51<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0102 ||:  66%|######6   | 73/110 [00:52<00:25,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0099 ||:  67%|######7   | 74/110 [00:53<00:25,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0092 ||:  68%|######8   | 75/110 [00:53<00:25,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0069 ||:  69%|######9   | 76/110 [00:54<00:23,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0081 ||:  70%|#######   | 77/110 [00:55<00:23,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0103 ||:  71%|#######   | 78/110 [00:56<00:22,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0075 ||:  72%|#######1  | 79/110 [00:56<00:21,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0084 ||:  73%|#######2  | 80/110 [00:58<00:28,  1.04it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0083 ||:  74%|#######3  | 81/110 [00:58<00:25,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0074 ||:  75%|#######4  | 82/110 [00:59<00:22,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0073 ||:  75%|#######5  | 83/110 [01:00<00:20,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0072 ||:  76%|#######6  | 84/110 [01:01<00:20,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0073 ||:  77%|#######7  | 85/110 [01:01<00:18,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0061 ||:  78%|#######8  | 86/110 [01:02<00:17,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0054 ||:  79%|#######9  | 87/110 [01:03<00:16,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0052 ||:  80%|########  | 88/110 [01:03<00:16,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0057 ||:  81%|########  | 89/110 [01:04<00:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0056 ||:  82%|########1 | 90/110 [01:05<00:15,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0051 ||:  83%|########2 | 91/110 [01:06<00:13,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0056 ||:  84%|########3 | 92/110 [01:06<00:12,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0052 ||:  85%|########4 | 93/110 [01:07<00:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0054 ||:  85%|########5 | 94/110 [01:08<00:11,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0066 ||:  86%|########6 | 95/110 [01:09<00:11,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0058 ||:  87%|########7 | 96/110 [01:09<00:10,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0076 ||:  88%|########8 | 97/110 [01:10<00:09,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0065 ||:  89%|########9 | 98/110 [01:11<00:09,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0083 ||:  90%|######### | 99/110 [01:12<00:08,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0081 ||:  91%|######### | 100/110 [01:12<00:07,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0076 ||:  92%|#########1| 101/110 [01:13<00:06,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0072 ||:  93%|#########2| 102/110 [01:14<00:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0060 ||:  94%|#########3| 103/110 [01:14<00:05,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0062 ||:  95%|#########4| 104/110 [01:15<00:04,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0049 ||:  95%|#########5| 105/110 [01:16<00:03,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0043 ||:  96%|#########6| 106/110 [01:16<00:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0034 ||:  97%|#########7| 107/110 [01:17<00:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0035 ||:  98%|#########8| 108/110 [01:18<00:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0042 ||:  99%|#########9| 109/110 [01:19<00:00,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0031 ||: 100%|##########| 110/110 [01:19<00:00,  1.65it/s]
+acc: 0.0000, no_result: 0.0000, loss: 1.0031 ||: 100%|##########| 110/110 [01:19<00:00,  1.39it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.2605, acc: 0.5000, no_result: 0.0312, loss: 0.8856 ||:   4%|4         | 1/24 [00:00<00:08,  2.82it/s]
+BLEU: 0.2355, acc: 0.5156, no_result: 0.0781, loss: 0.8133 ||:   8%|8         | 2/24 [00:00<00:07,  2.86it/s]
+BLEU: 0.2241, acc: 0.5104, no_result: 0.1042, loss: 0.8651 ||:  12%|#2        | 3/24 [00:01<00:07,  2.92it/s]
+BLEU: 0.2586, acc: 0.4688, no_result: 0.1641, loss: 0.9590 ||:  17%|#6        | 4/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.3145, acc: 0.4938, no_result: 0.1625, loss: 0.9247 ||:  21%|##        | 5/24 [00:01<00:07,  2.69it/s]
+BLEU: 0.3242, acc: 0.4792, no_result: 0.1562, loss: 0.9467 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.3052, acc: 0.4821, no_result: 0.1518, loss: 0.9381 ||:  29%|##9       | 7/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.2960, acc: 0.4883, no_result: 0.1484, loss: 0.9374 ||:  33%|###3      | 8/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.2957, acc: 0.4618, no_result: 0.1458, loss: 0.9376 ||:  38%|###7      | 9/24 [00:03<00:05,  2.91it/s]
+BLEU: 0.3038, acc: 0.4375, no_result: 0.1531, loss: 0.9491 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.3132, acc: 0.4233, no_result: 0.1591, loss: 0.9528 ||:  46%|####5     | 11/24 [00:03<00:04,  2.74it/s]
+BLEU: 0.3129, acc: 0.4531, no_result: 0.1484, loss: 0.9430 ||:  50%|#####     | 12/24 [00:04<00:04,  2.83it/s]
+BLEU: 0.3034, acc: 0.4519, no_result: 0.1562, loss: 0.9594 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.3088, acc: 0.4509, no_result: 0.1518, loss: 0.9645 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.65it/s]
+BLEU: 0.2952, acc: 0.4354, no_result: 0.1812, loss: 0.9968 ||:  62%|######2   | 15/24 [00:05<00:03,  2.49it/s]
+BLEU: 0.2881, acc: 0.4238, no_result: 0.1816, loss: 1.0101 ||:  67%|######6   | 16/24 [00:05<00:03,  2.47it/s]
+BLEU: 0.2864, acc: 0.4136, no_result: 0.2059, loss: 1.0206 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.2811, acc: 0.4149, no_result: 0.2083, loss: 1.0222 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.47it/s]
+BLEU: 0.2826, acc: 0.4161, no_result: 0.2039, loss: 1.0110 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.55it/s]
+BLEU: 0.2827, acc: 0.4188, no_result: 0.1953, loss: 1.0060 ||:  83%|########3 | 20/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.2801, acc: 0.4152, no_result: 0.1875, loss: 1.0000 ||:  88%|########7 | 21/24 [00:07<00:01,  2.70it/s]
+BLEU: 0.2812, acc: 0.4119, no_result: 0.1861, loss: 0.9996 ||:  92%|#########1| 22/24 [00:08<00:00,  2.69it/s]
+BLEU: 0.2811, acc: 0.4212, no_result: 0.1821, loss: 0.9921 ||:  96%|#########5| 23/24 [00:08<00:00,  2.78it/s]
+BLEU: 0.2808, acc: 0.4175, no_result: 0.1745, loss: 0.9995 ||: 100%|##########| 24/24 [00:08<00:00,  3.49it/s]
+BLEU: 0.2808, acc: 0.4175, no_result: 0.1745, loss: 0.9995 ||: 100%|##########| 24/24 [00:08<00:00,  2.79it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9430 ||:   1%|          | 1/110 [00:00<01:28,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9345 ||:   2%|1         | 2/110 [00:01<01:20,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9410 ||:   3%|2         | 3/110 [00:02<01:14,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9505 ||:   4%|3         | 4/110 [00:02<01:13,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9454 ||:   5%|4         | 5/110 [00:03<01:12,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9348 ||:   5%|5         | 6/110 [00:04<01:13,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9301 ||:   6%|6         | 7/110 [00:04<01:12,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9492 ||:   7%|7         | 8/110 [00:05<01:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9502 ||:   8%|8         | 9/110 [00:06<01:13,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9474 ||:   9%|9         | 10/110 [00:07<01:10,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9471 ||:  10%|#         | 11/110 [00:07<01:11,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9666 ||:  11%|#         | 12/110 [00:08<01:12,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9606 ||:  12%|#1        | 13/110 [00:09<01:11,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9656 ||:  13%|#2        | 14/110 [00:10<01:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9644 ||:  14%|#3        | 15/110 [00:10<01:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9560 ||:  15%|#4        | 16/110 [00:11<01:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9585 ||:  15%|#5        | 17/110 [00:12<01:02,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9583 ||:  16%|#6        | 18/110 [00:12<01:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9585 ||:  17%|#7        | 19/110 [00:13<01:00,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9584 ||:  18%|#8        | 20/110 [00:13<00:58,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9624 ||:  19%|#9        | 21/110 [00:14<00:57,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9668 ||:  20%|##        | 22/110 [00:15<00:57,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9677 ||:  21%|##        | 23/110 [00:16<00:59,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9637 ||:  22%|##1       | 24/110 [00:16<00:56,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9632 ||:  23%|##2       | 25/110 [00:17<00:54,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9576 ||:  24%|##3       | 26/110 [00:17<00:53,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9556 ||:  25%|##4       | 27/110 [00:18<00:52,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9523 ||:  25%|##5       | 28/110 [00:19<00:53,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9541 ||:  26%|##6       | 29/110 [00:19<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9560 ||:  27%|##7       | 30/110 [00:20<00:53,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9601 ||:  28%|##8       | 31/110 [00:21<00:52,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9554 ||:  29%|##9       | 32/110 [00:21<00:55,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9561 ||:  30%|###       | 33/110 [00:22<00:54,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9546 ||:  31%|###       | 34/110 [00:23<00:53,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9545 ||:  32%|###1      | 35/110 [00:24<00:52,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9556 ||:  33%|###2      | 36/110 [00:24<00:51,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9601 ||:  34%|###3      | 37/110 [00:25<00:53,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9584 ||:  35%|###4      | 38/110 [00:26<00:53,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9573 ||:  35%|###5      | 39/110 [00:27<00:52,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9553 ||:  36%|###6      | 40/110 [00:27<00:50,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9564 ||:  37%|###7      | 41/110 [00:28<00:47,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9554 ||:  38%|###8      | 42/110 [00:29<00:48,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9538 ||:  39%|###9      | 43/110 [00:29<00:46,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9565 ||:  40%|####      | 44/110 [00:30<00:45,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9565 ||:  41%|####      | 45/110 [00:31<00:45,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9571 ||:  42%|####1     | 46/110 [00:31<00:43,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9578 ||:  43%|####2     | 47/110 [00:32<00:44,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9574 ||:  44%|####3     | 48/110 [00:33<00:42,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9570 ||:  45%|####4     | 49/110 [00:33<00:41,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9546 ||:  45%|####5     | 50/110 [00:34<00:40,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9537 ||:  46%|####6     | 51/110 [00:35<00:40,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9540 ||:  47%|####7     | 52/110 [00:35<00:39,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9530 ||:  48%|####8     | 53/110 [00:36<00:38,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9499 ||:  49%|####9     | 54/110 [00:37<00:39,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9500 ||:  50%|#####     | 55/110 [00:38<00:40,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9495 ||:  51%|#####     | 56/110 [00:38<00:39,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9497 ||:  52%|#####1    | 57/110 [00:39<00:37,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9488 ||:  53%|#####2    | 58/110 [00:40<00:35,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9488 ||:  54%|#####3    | 59/110 [00:40<00:35,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9482 ||:  55%|#####4    | 60/110 [00:41<00:35,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9483 ||:  55%|#####5    | 61/110 [00:42<00:34,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9484 ||:  56%|#####6    | 62/110 [00:42<00:32,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9491 ||:  57%|#####7    | 63/110 [00:43<00:32,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9488 ||:  58%|#####8    | 64/110 [00:44<00:30,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9502 ||:  59%|#####9    | 65/110 [00:45<00:33,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9499 ||:  60%|######    | 66/110 [00:45<00:32,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9483 ||:  61%|######    | 67/110 [00:46<00:30,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9485 ||:  62%|######1   | 68/110 [00:47<00:28,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9481 ||:  63%|######2   | 69/110 [00:47<00:27,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9485 ||:  64%|######3   | 70/110 [00:49<00:37,  1.07it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9492 ||:  65%|######4   | 71/110 [00:50<00:33,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9481 ||:  65%|######5   | 72/110 [00:50<00:31,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9494 ||:  66%|######6   | 73/110 [00:51<00:30,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9481 ||:  67%|######7   | 74/110 [00:52<00:28,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9488 ||:  68%|######8   | 75/110 [00:53<00:26,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9479 ||:  69%|######9   | 76/110 [00:53<00:25,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9490 ||:  70%|#######   | 77/110 [00:54<00:23,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9484 ||:  71%|#######   | 78/110 [00:55<00:23,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9481 ||:  72%|#######1  | 79/110 [00:55<00:21,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9473 ||:  73%|#######2  | 80/110 [00:56<00:20,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9455 ||:  74%|#######3  | 81/110 [00:57<00:19,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9460 ||:  75%|#######4  | 82/110 [00:57<00:20,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9464 ||:  75%|#######5  | 83/110 [00:58<00:19,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9460 ||:  76%|#######6  | 84/110 [00:59<00:18,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9462 ||:  77%|#######7  | 85/110 [01:00<00:17,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9455 ||:  78%|#######8  | 86/110 [01:00<00:16,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9455 ||:  79%|#######9  | 87/110 [01:01<00:16,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9450 ||:  80%|########  | 88/110 [01:02<00:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9444 ||:  81%|########  | 89/110 [01:03<00:15,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9452 ||:  82%|########1 | 90/110 [01:03<00:15,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9473 ||:  83%|########2 | 91/110 [01:04<00:14,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9471 ||:  84%|########3 | 92/110 [01:05<00:13,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9472 ||:  85%|########4 | 93/110 [01:06<00:13,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9464 ||:  85%|########5 | 94/110 [01:06<00:12,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9464 ||:  86%|########6 | 95/110 [01:07<00:11,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9469 ||:  87%|########7 | 96/110 [01:08<00:10,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9473 ||:  88%|########8 | 97/110 [01:09<00:10,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9454 ||:  89%|########9 | 98/110 [01:09<00:09,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9449 ||:  90%|######### | 99/110 [01:10<00:08,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9434 ||:  91%|######### | 100/110 [01:11<00:07,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9432 ||:  92%|#########1| 101/110 [01:12<00:06,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9424 ||:  93%|#########2| 102/110 [01:12<00:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9419 ||:  94%|#########3| 103/110 [01:13<00:05,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9414 ||:  95%|#########4| 104/110 [01:14<00:04,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9408 ||:  95%|#########5| 105/110 [01:15<00:03,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9406 ||:  96%|#########6| 106/110 [01:15<00:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9408 ||:  97%|#########7| 107/110 [01:16<00:02,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9413 ||:  98%|#########8| 108/110 [01:17<00:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9420 ||:  99%|#########9| 109/110 [01:18<00:00,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9418 ||: 100%|##########| 110/110 [01:18<00:00,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.9418 ||: 100%|##########| 110/110 [01:18<00:00,  1.40it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.2767, acc: 0.5312, no_result: 0.0625, loss: 0.8111 ||:   4%|4         | 1/24 [00:00<00:08,  2.82it/s]
+BLEU: 0.2907, acc: 0.5312, no_result: 0.1094, loss: 0.7803 ||:   8%|8         | 2/24 [00:00<00:07,  2.82it/s]
+BLEU: 0.2977, acc: 0.5208, no_result: 0.1250, loss: 0.8119 ||:  12%|#2        | 3/24 [00:01<00:07,  2.91it/s]
+BLEU: 0.3109, acc: 0.5000, no_result: 0.1641, loss: 0.8803 ||:  17%|#6        | 4/24 [00:01<00:07,  2.78it/s]
+BLEU: 0.3622, acc: 0.5250, no_result: 0.1625, loss: 0.8450 ||:  21%|##        | 5/24 [00:01<00:06,  2.74it/s]
+BLEU: 0.3678, acc: 0.5000, no_result: 0.1615, loss: 0.8685 ||:  25%|##5       | 6/24 [00:02<00:06,  2.86it/s]
+BLEU: 0.3527, acc: 0.5089, no_result: 0.1518, loss: 0.8569 ||:  29%|##9       | 7/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.3352, acc: 0.5078, no_result: 0.1523, loss: 0.8557 ||:  33%|###3      | 8/24 [00:02<00:05,  2.98it/s]
+BLEU: 0.3443, acc: 0.5000, no_result: 0.1632, loss: 0.8540 ||:  38%|###7      | 9/24 [00:03<00:05,  2.91it/s]
+BLEU: 0.3537, acc: 0.4813, no_result: 0.1625, loss: 0.8548 ||:  42%|####1     | 10/24 [00:03<00:04,  2.85it/s]
+BLEU: 0.3655, acc: 0.4716, no_result: 0.1648, loss: 0.8568 ||:  46%|####5     | 11/24 [00:03<00:04,  2.74it/s]
+BLEU: 0.3637, acc: 0.4948, no_result: 0.1536, loss: 0.8556 ||:  50%|#####     | 12/24 [00:04<00:04,  2.68it/s]
+BLEU: 0.3493, acc: 0.4832, no_result: 0.1538, loss: 0.8746 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.54it/s]
+BLEU: 0.3486, acc: 0.4799, no_result: 0.1473, loss: 0.8830 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.47it/s]
+BLEU: 0.3303, acc: 0.4646, no_result: 0.1729, loss: 0.9185 ||:  62%|######2   | 15/24 [00:05<00:03,  2.27it/s]
+BLEU: 0.3237, acc: 0.4492, no_result: 0.1797, loss: 0.9304 ||:  67%|######6   | 16/24 [00:06<00:03,  2.21it/s]
+BLEU: 0.3182, acc: 0.4412, no_result: 0.1949, loss: 0.9397 ||:  71%|#######   | 17/24 [00:06<00:03,  2.21it/s]
+BLEU: 0.3105, acc: 0.4392, no_result: 0.2014, loss: 0.9403 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.20it/s]
+BLEU: 0.3118, acc: 0.4424, no_result: 0.1957, loss: 0.9310 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.29it/s]
+BLEU: 0.3126, acc: 0.4375, no_result: 0.1906, loss: 0.9289 ||:  83%|########3 | 20/24 [00:07<00:01,  2.41it/s]
+BLEU: 0.3100, acc: 0.4286, no_result: 0.1860, loss: 0.9256 ||:  88%|########7 | 21/24 [00:08<00:01,  2.43it/s]
+BLEU: 0.3119, acc: 0.4205, no_result: 0.1847, loss: 0.9243 ||:  92%|#########1| 22/24 [00:08<00:00,  2.40it/s]
+BLEU: 0.3132, acc: 0.4253, no_result: 0.1807, loss: 0.9190 ||:  96%|#########5| 23/24 [00:09<00:00,  2.46it/s]
+BLEU: 0.3122, acc: 0.4168, no_result: 0.1732, loss: 0.9283 ||: 100%|##########| 24/24 [00:09<00:00,  3.05it/s]
+BLEU: 0.3122, acc: 0.4168, no_result: 0.1732, loss: 0.9283 ||: 100%|##########| 24/24 [00:09<00:00,  2.61it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8639 ||:   1%|          | 1/110 [00:00<01:27,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8742 ||:   2%|1         | 2/110 [00:01<01:32,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8453 ||:   3%|2         | 3/110 [00:02<01:26,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8308 ||:   4%|3         | 4/110 [00:03<01:20,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8573 ||:   5%|4         | 5/110 [00:03<01:21,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8485 ||:   5%|5         | 6/110 [00:04<01:23,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8535 ||:   6%|6         | 7/110 [00:05<01:22,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8464 ||:   7%|7         | 8/110 [00:06<01:21,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8535 ||:   8%|8         | 9/110 [00:07<01:15,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8604 ||:   9%|9         | 10/110 [00:07<01:11,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8509 ||:  10%|#         | 11/110 [00:08<01:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8498 ||:  11%|#         | 12/110 [00:08<01:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8619 ||:  12%|#1        | 13/110 [00:09<01:07,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8685 ||:  13%|#2        | 14/110 [00:10<01:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8671 ||:  14%|#3        | 15/110 [00:11<01:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8689 ||:  15%|#4        | 16/110 [00:11<01:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8616 ||:  15%|#5        | 17/110 [00:12<01:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8702 ||:  16%|#6        | 18/110 [00:13<01:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8796 ||:  17%|#7        | 19/110 [00:13<01:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8795 ||:  18%|#8        | 20/110 [00:14<01:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8774 ||:  19%|#9        | 21/110 [00:15<01:00,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8751 ||:  20%|##        | 22/110 [00:15<01:00,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8718 ||:  21%|##        | 23/110 [00:16<01:00,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8725 ||:  22%|##1       | 24/110 [00:17<00:59,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8756 ||:  23%|##2       | 25/110 [00:18<00:59,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8791 ||:  24%|##3       | 26/110 [00:18<00:58,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8775 ||:  25%|##4       | 27/110 [00:19<01:02,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8804 ||:  25%|##5       | 28/110 [00:20<00:59,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8755 ||:  26%|##6       | 29/110 [00:21<01:03,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8745 ||:  27%|##7       | 30/110 [00:21<01:02,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8762 ||:  28%|##8       | 31/110 [00:22<00:59,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8815 ||:  29%|##9       | 32/110 [00:23<00:59,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8824 ||:  30%|###       | 33/110 [00:24<01:03,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8823 ||:  31%|###       | 34/110 [00:25<00:59,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8785 ||:  32%|###1      | 35/110 [00:25<00:55,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8803 ||:  33%|###2      | 36/110 [00:26<00:56,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8839 ||:  34%|###3      | 37/110 [00:27<00:55,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8830 ||:  35%|###4      | 38/110 [00:28<00:53,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8852 ||:  35%|###5      | 39/110 [00:28<00:51,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8857 ||:  36%|###6      | 40/110 [00:29<00:51,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8874 ||:  37%|###7      | 41/110 [00:30<00:50,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8893 ||:  38%|###8      | 42/110 [00:30<00:49,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8878 ||:  39%|###9      | 43/110 [00:31<00:46,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8926 ||:  40%|####      | 44/110 [00:32<00:47,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8951 ||:  41%|####      | 45/110 [00:32<00:46,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8954 ||:  42%|####1     | 46/110 [00:33<00:45,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8934 ||:  43%|####2     | 47/110 [00:34<00:44,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8930 ||:  44%|####3     | 48/110 [00:35<00:43,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8919 ||:  45%|####4     | 49/110 [00:35<00:43,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8901 ||:  45%|####5     | 50/110 [00:36<00:42,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8895 ||:  46%|####6     | 51/110 [00:37<00:42,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8889 ||:  47%|####7     | 52/110 [00:38<00:44,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8920 ||:  48%|####8     | 53/110 [00:38<00:44,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8905 ||:  49%|####9     | 54/110 [00:39<00:41,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8914 ||:  50%|#####     | 55/110 [00:40<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8893 ||:  51%|#####     | 56/110 [00:40<00:37,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8893 ||:  52%|#####1    | 57/110 [00:41<00:36,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8865 ||:  53%|#####2    | 58/110 [00:42<00:37,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8860 ||:  54%|#####3    | 59/110 [00:43<00:37,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8866 ||:  55%|#####4    | 60/110 [00:44<00:48,  1.02it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8854 ||:  55%|#####5    | 61/110 [00:45<00:43,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8846 ||:  56%|#####6    | 62/110 [00:46<00:39,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8840 ||:  57%|#####7    | 63/110 [00:46<00:37,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8840 ||:  58%|#####8    | 64/110 [00:47<00:35,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8847 ||:  59%|#####9    | 65/110 [00:48<00:32,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8862 ||:  60%|######    | 66/110 [00:48<00:31,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8843 ||:  61%|######    | 67/110 [00:49<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8858 ||:  62%|######1   | 68/110 [00:50<00:29,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8863 ||:  63%|######2   | 69/110 [00:50<00:27,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8870 ||:  64%|######3   | 70/110 [00:51<00:25,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8858 ||:  65%|######4   | 71/110 [00:52<00:24,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8856 ||:  65%|######5   | 72/110 [00:52<00:23,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8874 ||:  66%|######6   | 73/110 [00:53<00:23,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8873 ||:  67%|######7   | 74/110 [00:54<00:24,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8871 ||:  68%|######8   | 75/110 [00:54<00:22,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8869 ||:  69%|######9   | 76/110 [00:55<00:22,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8874 ||:  70%|#######   | 77/110 [00:56<00:22,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8866 ||:  71%|#######   | 78/110 [00:56<00:21,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8853 ||:  72%|#######1  | 79/110 [00:57<00:20,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8872 ||:  73%|#######2  | 80/110 [00:58<00:19,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8877 ||:  74%|#######3  | 81/110 [00:58<00:19,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8865 ||:  75%|#######4  | 82/110 [00:59<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8868 ||:  75%|#######5  | 83/110 [00:59<00:17,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8856 ||:  76%|#######6  | 84/110 [01:00<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8860 ||:  77%|#######7  | 85/110 [01:01<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8890 ||:  78%|#######8  | 86/110 [01:02<00:17,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8883 ||:  79%|#######9  | 87/110 [01:02<00:16,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8899 ||:  80%|########  | 88/110 [01:03<00:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8888 ||:  81%|########  | 89/110 [01:04<00:14,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8900 ||:  82%|########1 | 90/110 [01:04<00:14,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8902 ||:  83%|########2 | 91/110 [01:05<00:13,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8917 ||:  84%|########3 | 92/110 [01:06<00:14,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8904 ||:  85%|########4 | 93/110 [01:07<00:13,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8904 ||:  85%|########5 | 94/110 [01:08<00:12,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8907 ||:  86%|########6 | 95/110 [01:08<00:11,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8904 ||:  87%|########7 | 96/110 [01:09<00:10,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8916 ||:  88%|########8 | 97/110 [01:10<00:09,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8909 ||:  89%|########9 | 98/110 [01:10<00:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8903 ||:  90%|######### | 99/110 [01:11<00:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8923 ||:  91%|######### | 100/110 [01:12<00:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8928 ||:  92%|#########1| 101/110 [01:13<00:06,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8942 ||:  93%|#########2| 102/110 [01:13<00:05,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8945 ||:  94%|#########3| 103/110 [01:14<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8939 ||:  95%|#########4| 104/110 [01:15<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8948 ||:  95%|#########5| 105/110 [01:15<00:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8940 ||:  96%|#########6| 106/110 [01:16<00:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8939 ||:  97%|#########7| 107/110 [01:17<00:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8946 ||:  98%|#########8| 108/110 [01:17<00:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8947 ||:  99%|#########9| 109/110 [01:18<00:00,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8954 ||: 100%|##########| 110/110 [01:18<00:00,  1.68it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8954 ||: 100%|##########| 110/110 [01:18<00:00,  1.39it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.3606, acc: 0.5625, no_result: 0.0625, loss: 0.7655 ||:   4%|4         | 1/24 [00:00<00:08,  2.59it/s]
+BLEU: 0.3900, acc: 0.5312, no_result: 0.0625, loss: 0.7271 ||:   8%|8         | 2/24 [00:00<00:08,  2.58it/s]
+BLEU: 0.3485, acc: 0.5417, no_result: 0.0833, loss: 0.7634 ||:  12%|#2        | 3/24 [00:01<00:08,  2.59it/s]
+BLEU: 0.3455, acc: 0.5234, no_result: 0.1328, loss: 0.8406 ||:  17%|#6        | 4/24 [00:01<00:08,  2.44it/s]
+BLEU: 0.3850, acc: 0.5437, no_result: 0.1313, loss: 0.8217 ||:  21%|##        | 5/24 [00:02<00:07,  2.45it/s]
+BLEU: 0.3935, acc: 0.5156, no_result: 0.1250, loss: 0.8511 ||:  25%|##5       | 6/24 [00:02<00:07,  2.56it/s]
+BLEU: 0.3813, acc: 0.5179, no_result: 0.1295, loss: 0.8330 ||:  29%|##9       | 7/24 [00:02<00:06,  2.63it/s]
+BLEU: 0.3638, acc: 0.5234, no_result: 0.1289, loss: 0.8300 ||:  33%|###3      | 8/24 [00:03<00:06,  2.66it/s]
+BLEU: 0.3759, acc: 0.5069, no_result: 0.1458, loss: 0.8313 ||:  38%|###7      | 9/24 [00:03<00:05,  2.58it/s]
+BLEU: 0.3790, acc: 0.4844, no_result: 0.1406, loss: 0.8360 ||:  42%|####1     | 10/24 [00:03<00:05,  2.54it/s]
+BLEU: 0.3866, acc: 0.4801, no_result: 0.1420, loss: 0.8420 ||:  46%|####5     | 11/24 [00:04<00:05,  2.44it/s]
+BLEU: 0.3901, acc: 0.4974, no_result: 0.1354, loss: 0.8401 ||:  50%|#####     | 12/24 [00:04<00:04,  2.51it/s]
+BLEU: 0.3683, acc: 0.4808, no_result: 0.1442, loss: 0.8572 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.40it/s]
+BLEU: 0.3663, acc: 0.4844, no_result: 0.1406, loss: 0.8642 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.37it/s]
+BLEU: 0.3463, acc: 0.4688, no_result: 0.1562, loss: 0.8946 ||:  62%|######2   | 15/24 [00:06<00:04,  2.23it/s]
+BLEU: 0.3339, acc: 0.4531, no_result: 0.1602, loss: 0.9082 ||:  67%|######6   | 16/24 [00:06<00:03,  2.20it/s]
+BLEU: 0.3255, acc: 0.4430, no_result: 0.1728, loss: 0.9177 ||:  71%|#######   | 17/24 [00:07<00:03,  2.22it/s]
+BLEU: 0.3181, acc: 0.4427, no_result: 0.1788, loss: 0.9162 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.20it/s]
+BLEU: 0.3211, acc: 0.4474, no_result: 0.1743, loss: 0.9054 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.29it/s]
+BLEU: 0.3253, acc: 0.4484, no_result: 0.1688, loss: 0.9027 ||:  83%|########3 | 20/24 [00:08<00:01,  2.40it/s]
+BLEU: 0.3214, acc: 0.4390, no_result: 0.1652, loss: 0.8997 ||:  88%|########7 | 21/24 [00:08<00:01,  2.40it/s]
+BLEU: 0.3234, acc: 0.4375, no_result: 0.1619, loss: 0.9001 ||:  92%|#########1| 22/24 [00:09<00:00,  2.38it/s]
+BLEU: 0.3249, acc: 0.4457, no_result: 0.1576, loss: 0.8926 ||:  96%|#########5| 23/24 [00:09<00:00,  2.46it/s]
+BLEU: 0.3238, acc: 0.4317, no_result: 0.1557, loss: 0.9082 ||: 100%|##########| 24/24 [00:09<00:00,  3.03it/s]
+BLEU: 0.3238, acc: 0.4317, no_result: 0.1557, loss: 0.9082 ||: 100%|##########| 24/24 [00:09<00:00,  2.49it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8919 ||:   1%|          | 1/110 [00:00<01:25,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8829 ||:   2%|1         | 2/110 [00:01<01:23,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8717 ||:   3%|2         | 3/110 [00:02<01:15,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8532 ||:   4%|3         | 4/110 [00:02<01:16,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8580 ||:   5%|4         | 5/110 [00:03<01:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8444 ||:   5%|5         | 6/110 [00:04<01:12,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8279 ||:   6%|6         | 7/110 [00:04<01:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8330 ||:   7%|7         | 8/110 [00:05<01:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8406 ||:   8%|8         | 9/110 [00:06<01:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8446 ||:   9%|9         | 10/110 [00:06<01:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8435 ||:  10%|#         | 11/110 [00:07<01:04,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8377 ||:  11%|#         | 12/110 [00:08<01:03,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8385 ||:  12%|#1        | 13/110 [00:08<01:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8445 ||:  13%|#2        | 14/110 [00:09<01:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8471 ||:  14%|#3        | 15/110 [00:10<01:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8363 ||:  15%|#4        | 16/110 [00:11<01:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8338 ||:  15%|#5        | 17/110 [00:11<01:08,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8345 ||:  16%|#6        | 18/110 [00:12<01:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8390 ||:  17%|#7        | 19/110 [00:13<01:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8383 ||:  18%|#8        | 20/110 [00:14<01:04,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8403 ||:  19%|#9        | 21/110 [00:14<01:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8346 ||:  20%|##        | 22/110 [00:15<00:58,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8329 ||:  21%|##        | 23/110 [00:15<00:58,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8314 ||:  22%|##1       | 24/110 [00:16<00:56,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8334 ||:  23%|##2       | 25/110 [00:17<00:55,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8330 ||:  24%|##3       | 26/110 [00:17<00:56,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8319 ||:  25%|##4       | 27/110 [00:18<00:55,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8315 ||:  25%|##5       | 28/110 [00:19<00:55,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8347 ||:  26%|##6       | 29/110 [00:20<00:59,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8404 ||:  27%|##7       | 30/110 [00:20<01:00,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8381 ||:  28%|##8       | 31/110 [00:21<00:57,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8407 ||:  29%|##9       | 32/110 [00:22<00:55,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8408 ||:  30%|###       | 33/110 [00:22<00:52,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8398 ||:  31%|###       | 34/110 [00:23<00:49,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8407 ||:  32%|###1      | 35/110 [00:24<00:50,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8391 ||:  33%|###2      | 36/110 [00:24<00:50,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8401 ||:  34%|###3      | 37/110 [00:25<00:48,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8374 ||:  35%|###4      | 38/110 [00:26<00:48,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8349 ||:  35%|###5      | 39/110 [00:26<00:45,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8350 ||:  36%|###6      | 40/110 [00:27<00:46,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8356 ||:  37%|###7      | 41/110 [00:28<00:46,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8371 ||:  38%|###8      | 42/110 [00:28<00:46,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8398 ||:  39%|###9      | 43/110 [00:29<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8382 ||:  40%|####      | 44/110 [00:30<00:45,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8415 ||:  41%|####      | 45/110 [00:31<00:47,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8408 ||:  42%|####1     | 46/110 [00:31<00:44,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8431 ||:  43%|####2     | 47/110 [00:32<00:43,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8442 ||:  44%|####3     | 48/110 [00:33<00:43,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8426 ||:  45%|####4     | 49/110 [00:33<00:41,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8422 ||:  45%|####5     | 50/110 [00:35<00:53,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8451 ||:  46%|####6     | 51/110 [00:35<00:49,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8445 ||:  47%|####7     | 52/110 [00:36<00:48,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8445 ||:  48%|####8     | 53/110 [00:37<00:47,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8453 ||:  49%|####9     | 54/110 [00:38<00:46,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8440 ||:  50%|#####     | 55/110 [00:38<00:42,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8412 ||:  51%|#####     | 56/110 [00:39<00:39,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8409 ||:  52%|#####1    | 57/110 [00:40<00:38,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8417 ||:  53%|#####2    | 58/110 [00:41<00:37,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8394 ||:  54%|#####3    | 59/110 [00:41<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8393 ||:  55%|#####4    | 60/110 [00:42<00:37,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8400 ||:  55%|#####5    | 61/110 [00:43<00:35,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8413 ||:  56%|#####6    | 62/110 [00:43<00:33,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8424 ||:  57%|#####7    | 63/110 [00:44<00:32,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8424 ||:  58%|#####8    | 64/110 [00:45<00:31,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8413 ||:  59%|#####9    | 65/110 [00:45<00:29,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8411 ||:  60%|######    | 66/110 [00:46<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8441 ||:  61%|######    | 67/110 [00:47<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8438 ||:  62%|######1   | 68/110 [00:47<00:28,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8417 ||:  63%|######2   | 69/110 [00:48<00:26,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8408 ||:  64%|######3   | 70/110 [00:49<00:25,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8427 ||:  65%|######4   | 71/110 [00:49<00:25,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8422 ||:  65%|######5   | 72/110 [00:50<00:25,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8423 ||:  66%|######6   | 73/110 [00:51<00:24,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8433 ||:  67%|######7   | 74/110 [00:51<00:23,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8420 ||:  68%|######8   | 75/110 [00:52<00:22,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8417 ||:  69%|######9   | 76/110 [00:52<00:21,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8430 ||:  70%|#######   | 77/110 [00:53<00:21,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8435 ||:  71%|#######   | 78/110 [00:54<00:20,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8445 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8447 ||:  73%|#######2  | 80/110 [00:55<00:22,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8442 ||:  74%|#######3  | 81/110 [00:56<00:21,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8438 ||:  75%|#######4  | 82/110 [00:57<00:19,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8456 ||:  75%|#######5  | 83/110 [00:57<00:19,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8461 ||:  76%|#######6  | 84/110 [00:58<00:18,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8464 ||:  77%|#######7  | 85/110 [00:59<00:17,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8476 ||:  78%|#######8  | 86/110 [01:00<00:17,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8483 ||:  79%|#######9  | 87/110 [01:00<00:17,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8472 ||:  80%|########  | 88/110 [01:01<00:16,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8471 ||:  81%|########  | 89/110 [01:02<00:15,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8470 ||:  82%|########1 | 90/110 [01:03<00:14,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8468 ||:  83%|########2 | 91/110 [01:03<00:13,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8472 ||:  84%|########3 | 92/110 [01:04<00:12,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8456 ||:  85%|########4 | 93/110 [01:05<00:11,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8473 ||:  85%|########5 | 94/110 [01:05<00:11,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8461 ||:  86%|########6 | 95/110 [01:06<00:10,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8463 ||:  87%|########7 | 96/110 [01:07<00:09,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8469 ||:  88%|########8 | 97/110 [01:07<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8478 ||:  89%|########9 | 98/110 [01:08<00:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8475 ||:  90%|######### | 99/110 [01:09<00:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8475 ||:  91%|######### | 100/110 [01:09<00:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8503 ||:  92%|#########1| 101/110 [01:10<00:06,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8510 ||:  93%|#########2| 102/110 [01:11<00:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8507 ||:  94%|#########3| 103/110 [01:12<00:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8499 ||:  95%|#########4| 104/110 [01:12<00:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8486 ||:  95%|#########5| 105/110 [01:13<00:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8471 ||:  96%|#########6| 106/110 [01:14<00:02,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8451 ||:  97%|#########7| 107/110 [01:14<00:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8443 ||:  98%|#########8| 108/110 [01:15<00:01,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8439 ||:  99%|#########9| 109/110 [01:16<00:00,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8448 ||: 100%|##########| 110/110 [01:16<00:00,  1.75it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8448 ||: 100%|##########| 110/110 [01:16<00:00,  1.44it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.3848, acc: 0.5312, no_result: 0.0938, loss: 0.7383 ||:   4%|4         | 1/24 [00:00<00:07,  2.91it/s]
+BLEU: 0.3922, acc: 0.5469, no_result: 0.1250, loss: 0.6946 ||:   8%|8         | 2/24 [00:00<00:07,  2.95it/s]
+BLEU: 0.3673, acc: 0.5312, no_result: 0.1250, loss: 0.7435 ||:  12%|#2        | 3/24 [00:00<00:06,  3.03it/s]
+BLEU: 0.3576, acc: 0.5078, no_result: 0.1562, loss: 0.8031 ||:  17%|#6        | 4/24 [00:01<00:07,  2.85it/s]
+BLEU: 0.4021, acc: 0.5375, no_result: 0.1500, loss: 0.7637 ||:  21%|##        | 5/24 [00:01<00:06,  2.82it/s]
+BLEU: 0.4013, acc: 0.5104, no_result: 0.1406, loss: 0.7871 ||:  25%|##5       | 6/24 [00:02<00:06,  2.96it/s]
+BLEU: 0.3894, acc: 0.5089, no_result: 0.1295, loss: 0.7781 ||:  29%|##9       | 7/24 [00:02<00:05,  3.07it/s]
+BLEU: 0.3714, acc: 0.5156, no_result: 0.1211, loss: 0.7815 ||:  33%|###3      | 8/24 [00:02<00:05,  3.10it/s]
+BLEU: 0.3841, acc: 0.5174, no_result: 0.1146, loss: 0.7773 ||:  38%|###7      | 9/24 [00:02<00:04,  3.07it/s]
+BLEU: 0.3927, acc: 0.5031, no_result: 0.1125, loss: 0.7806 ||:  42%|####1     | 10/24 [00:03<00:04,  3.01it/s]
+BLEU: 0.3958, acc: 0.4915, no_result: 0.1136, loss: 0.7885 ||:  46%|####5     | 11/24 [00:03<00:04,  2.88it/s]
+BLEU: 0.3997, acc: 0.5104, no_result: 0.1068, loss: 0.7875 ||:  50%|#####     | 12/24 [00:04<00:04,  2.98it/s]
+BLEU: 0.3862, acc: 0.4976, no_result: 0.1106, loss: 0.8100 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.87it/s]
+BLEU: 0.3880, acc: 0.4978, no_result: 0.1094, loss: 0.8169 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.80it/s]
+BLEU: 0.3670, acc: 0.4792, no_result: 0.1354, loss: 0.8483 ||:  62%|######2   | 15/24 [00:05<00:03,  2.58it/s]
+BLEU: 0.3561, acc: 0.4629, no_result: 0.1406, loss: 0.8587 ||:  67%|######6   | 16/24 [00:05<00:03,  2.55it/s]
+BLEU: 0.3493, acc: 0.4577, no_result: 0.1489, loss: 0.8706 ||:  71%|#######   | 17/24 [00:06<00:02,  2.59it/s]
+BLEU: 0.3427, acc: 0.4583, no_result: 0.1528, loss: 0.8752 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.56it/s]
+BLEU: 0.3462, acc: 0.4589, no_result: 0.1497, loss: 0.8645 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.64it/s]
+BLEU: 0.3516, acc: 0.4578, no_result: 0.1453, loss: 0.8641 ||:  83%|########3 | 20/24 [00:07<00:01,  2.76it/s]
+BLEU: 0.3521, acc: 0.4524, no_result: 0.1399, loss: 0.8607 ||:  88%|########7 | 21/24 [00:07<00:01,  2.78it/s]
+BLEU: 0.3527, acc: 0.4446, no_result: 0.1392, loss: 0.8592 ||:  92%|#########1| 22/24 [00:07<00:00,  2.75it/s]
+BLEU: 0.3534, acc: 0.4524, no_result: 0.1372, loss: 0.8528 ||:  96%|#########5| 23/24 [00:08<00:00,  2.84it/s]
+BLEU: 0.3522, acc: 0.4429, no_result: 0.1315, loss: 0.8661 ||: 100%|##########| 24/24 [00:08<00:00,  3.53it/s]
+BLEU: 0.3522, acc: 0.4429, no_result: 0.1315, loss: 0.8661 ||: 100%|##########| 24/24 [00:08<00:00,  2.90it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7159 ||:   1%|          | 1/110 [00:00<01:07,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6708 ||:   2%|1         | 2/110 [00:01<01:24,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7161 ||:   3%|2         | 3/110 [00:02<01:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7516 ||:   4%|3         | 4/110 [00:02<01:11,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7834 ||:   5%|4         | 5/110 [00:03<01:08,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8046 ||:   5%|5         | 6/110 [00:04<01:13,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8206 ||:   6%|6         | 7/110 [00:04<01:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8071 ||:   7%|7         | 8/110 [00:05<01:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8082 ||:   8%|8         | 9/110 [00:06<01:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8105 ||:   9%|9         | 10/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8123 ||:  10%|#         | 11/110 [00:07<01:10,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8025 ||:  11%|#         | 12/110 [00:08<01:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8090 ||:  12%|#1        | 13/110 [00:08<01:03,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8176 ||:  13%|#2        | 14/110 [00:09<01:01,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8125 ||:  14%|#3        | 15/110 [00:10<01:00,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8141 ||:  15%|#4        | 16/110 [00:10<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8173 ||:  15%|#5        | 17/110 [00:11<01:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8148 ||:  16%|#6        | 18/110 [00:12<01:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8168 ||:  17%|#7        | 19/110 [00:12<00:59,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8108 ||:  18%|#8        | 20/110 [00:13<00:57,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8047 ||:  19%|#9        | 21/110 [00:13<00:57,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8057 ||:  20%|##        | 22/110 [00:14<00:59,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8110 ||:  21%|##        | 23/110 [00:15<01:05,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8115 ||:  22%|##1       | 24/110 [00:16<01:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8146 ||:  23%|##2       | 25/110 [00:16<00:59,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8157 ||:  24%|##3       | 26/110 [00:17<01:01,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8140 ||:  25%|##4       | 27/110 [00:18<00:57,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8154 ||:  25%|##5       | 28/110 [00:19<00:56,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8160 ||:  26%|##6       | 29/110 [00:19<00:54,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8211 ||:  27%|##7       | 30/110 [00:20<00:52,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8181 ||:  28%|##8       | 31/110 [00:20<00:51,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8177 ||:  29%|##9       | 32/110 [00:21<00:50,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8179 ||:  30%|###       | 33/110 [00:22<00:49,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8157 ||:  31%|###       | 34/110 [00:22<00:47,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8139 ||:  32%|###1      | 35/110 [00:23<00:49,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8151 ||:  33%|###2      | 36/110 [00:24<00:52,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8166 ||:  34%|###3      | 37/110 [00:25<00:50,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8170 ||:  35%|###4      | 38/110 [00:25<00:49,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8142 ||:  35%|###5      | 39/110 [00:26<00:47,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8137 ||:  36%|###6      | 40/110 [00:27<01:03,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8125 ||:  37%|###7      | 41/110 [00:28<00:56,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8156 ||:  38%|###8      | 42/110 [00:29<00:52,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8188 ||:  39%|###9      | 43/110 [00:29<00:50,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8197 ||:  40%|####      | 44/110 [00:30<00:46,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8153 ||:  41%|####      | 45/110 [00:31<00:45,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8148 ||:  42%|####1     | 46/110 [00:31<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8136 ||:  43%|####2     | 47/110 [00:32<00:42,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8102 ||:  44%|####3     | 48/110 [00:32<00:41,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8120 ||:  45%|####4     | 49/110 [00:33<00:42,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8094 ||:  45%|####5     | 50/110 [00:34<00:40,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8103 ||:  46%|####6     | 51/110 [00:35<00:39,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8090 ||:  47%|####7     | 52/110 [00:35<00:38,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8106 ||:  48%|####8     | 53/110 [00:36<00:37,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8095 ||:  49%|####9     | 54/110 [00:36<00:37,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8092 ||:  50%|#####     | 55/110 [00:37<00:35,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8077 ||:  51%|#####     | 56/110 [00:38<00:35,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8069 ||:  52%|#####1    | 57/110 [00:38<00:34,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8066 ||:  53%|#####2    | 58/110 [00:39<00:33,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8067 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8042 ||:  55%|#####4    | 60/110 [00:40<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8044 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8029 ||:  56%|#####6    | 62/110 [00:42<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8027 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8013 ||:  58%|#####8    | 64/110 [00:43<00:30,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8009 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7990 ||:  60%|######    | 66/110 [00:45<00:30,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7985 ||:  61%|######    | 67/110 [00:45<00:31,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7988 ||:  62%|######1   | 68/110 [00:46<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7988 ||:  63%|######2   | 69/110 [00:47<00:31,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8005 ||:  64%|######3   | 70/110 [00:48<00:29,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8004 ||:  65%|######4   | 71/110 [00:48<00:28,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7993 ||:  65%|######5   | 72/110 [00:49<00:26,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7981 ||:  66%|######6   | 73/110 [00:50<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7981 ||:  67%|######7   | 74/110 [00:50<00:24,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7985 ||:  68%|######8   | 75/110 [00:51<00:23,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7978 ||:  69%|######9   | 76/110 [00:51<00:22,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7978 ||:  70%|#######   | 77/110 [00:52<00:21,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7975 ||:  71%|#######   | 78/110 [00:53<00:21,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7967 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7986 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8000 ||:  74%|#######3  | 81/110 [00:55<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8009 ||:  75%|#######4  | 82/110 [00:56<00:19,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8021 ||:  75%|#######5  | 83/110 [00:56<00:19,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8025 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8019 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8031 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8025 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8021 ||:  80%|########  | 88/110 [01:00<00:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8030 ||:  81%|########  | 89/110 [01:00<00:14,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8021 ||:  82%|########1 | 90/110 [01:01<00:13,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8018 ||:  83%|########2 | 91/110 [01:02<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8025 ||:  84%|########3 | 92/110 [01:02<00:12,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8026 ||:  85%|########4 | 93/110 [01:03<00:11,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8034 ||:  85%|########5 | 94/110 [01:04<00:10,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8033 ||:  86%|########6 | 95/110 [01:04<00:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8039 ||:  87%|########7 | 96/110 [01:05<00:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8028 ||:  88%|########8 | 97/110 [01:06<00:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8043 ||:  89%|########9 | 98/110 [01:06<00:08,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8037 ||:  90%|######### | 99/110 [01:07<00:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8017 ||:  91%|######### | 100/110 [01:08<00:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8021 ||:  92%|#########1| 101/110 [01:08<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8024 ||:  93%|#########2| 102/110 [01:09<00:05,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8043 ||:  94%|#########3| 103/110 [01:10<00:04,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8029 ||:  95%|#########4| 104/110 [01:10<00:03,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8027 ||:  95%|#########5| 105/110 [01:11<00:03,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8020 ||:  96%|#########6| 106/110 [01:12<00:02,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8038 ||:  97%|#########7| 107/110 [01:12<00:01,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8052 ||:  98%|#########8| 108/110 [01:13<00:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8053 ||:  99%|#########9| 109/110 [01:14<00:00,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8050 ||: 100%|##########| 110/110 [01:14<00:00,  1.80it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.8050 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.3576, acc: 0.5312, no_result: 0.1250, loss: 0.6862 ||:   4%|4         | 1/24 [00:00<00:08,  2.79it/s]
+BLEU: 0.3717, acc: 0.5469, no_result: 0.0938, loss: 0.6549 ||:   8%|8         | 2/24 [00:00<00:07,  2.90it/s]
+BLEU: 0.3493, acc: 0.5104, no_result: 0.1146, loss: 0.7132 ||:  12%|#2        | 3/24 [00:01<00:07,  2.98it/s]
+BLEU: 0.3704, acc: 0.4688, no_result: 0.1562, loss: 0.7820 ||:  17%|#6        | 4/24 [00:01<00:07,  2.79it/s]
+BLEU: 0.4131, acc: 0.5062, no_result: 0.1437, loss: 0.7427 ||:  21%|##        | 5/24 [00:01<00:06,  2.80it/s]
+BLEU: 0.4100, acc: 0.4948, no_result: 0.1406, loss: 0.7538 ||:  25%|##5       | 6/24 [00:02<00:06,  2.93it/s]
+BLEU: 0.3968, acc: 0.5045, no_result: 0.1295, loss: 0.7482 ||:  29%|##9       | 7/24 [00:02<00:05,  3.05it/s]
+BLEU: 0.3811, acc: 0.5078, no_result: 0.1289, loss: 0.7532 ||:  33%|###3      | 8/24 [00:02<00:05,  3.05it/s]
+BLEU: 0.3952, acc: 0.5104, no_result: 0.1215, loss: 0.7476 ||:  38%|###7      | 9/24 [00:03<00:04,  3.02it/s]
+BLEU: 0.4034, acc: 0.4938, no_result: 0.1156, loss: 0.7531 ||:  42%|####1     | 10/24 [00:03<00:04,  2.95it/s]
+BLEU: 0.4068, acc: 0.4858, no_result: 0.1136, loss: 0.7619 ||:  46%|####5     | 11/24 [00:03<00:04,  2.83it/s]
+BLEU: 0.4090, acc: 0.4974, no_result: 0.1120, loss: 0.7607 ||:  50%|#####     | 12/24 [00:04<00:04,  2.93it/s]
+BLEU: 0.3918, acc: 0.4784, no_result: 0.1202, loss: 0.7827 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.80it/s]
+BLEU: 0.3929, acc: 0.4754, no_result: 0.1205, loss: 0.7895 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.74it/s]
+BLEU: 0.3695, acc: 0.4604, no_result: 0.1437, loss: 0.8212 ||:  62%|######2   | 15/24 [00:05<00:03,  2.54it/s]
+BLEU: 0.3551, acc: 0.4434, no_result: 0.1484, loss: 0.8337 ||:  67%|######6   | 16/24 [00:05<00:03,  2.51it/s]
+BLEU: 0.3468, acc: 0.4449, no_result: 0.1544, loss: 0.8418 ||:  71%|#######   | 17/24 [00:06<00:02,  2.56it/s]
+BLEU: 0.3422, acc: 0.4444, no_result: 0.1545, loss: 0.8428 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.54it/s]
+BLEU: 0.3448, acc: 0.4490, no_result: 0.1480, loss: 0.8324 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.64it/s]
+BLEU: 0.3504, acc: 0.4500, no_result: 0.1437, loss: 0.8334 ||:  83%|########3 | 20/24 [00:07<00:01,  2.78it/s]
+BLEU: 0.3485, acc: 0.4420, no_result: 0.1399, loss: 0.8311 ||:  88%|########7 | 21/24 [00:07<00:01,  2.76it/s]
+BLEU: 0.3496, acc: 0.4361, no_result: 0.1392, loss: 0.8315 ||:  92%|#########1| 22/24 [00:07<00:00,  2.72it/s]
+BLEU: 0.3497, acc: 0.4443, no_result: 0.1359, loss: 0.8248 ||:  96%|#########5| 23/24 [00:08<00:00,  2.80it/s]
+BLEU: 0.3484, acc: 0.4443, no_result: 0.1302, loss: 0.8394 ||: 100%|##########| 24/24 [00:08<00:00,  3.49it/s]
+BLEU: 0.3484, acc: 0.4443, no_result: 0.1302, loss: 0.8394 ||: 100%|##########| 24/24 [00:08<00:00,  2.86it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7062 ||:   1%|          | 1/110 [00:00<01:24,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7140 ||:   2%|1         | 2/110 [00:01<01:25,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7372 ||:   3%|2         | 3/110 [00:02<01:21,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7155 ||:   4%|3         | 4/110 [00:02<01:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7297 ||:   5%|4         | 5/110 [00:03<01:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7154 ||:   5%|5         | 6/110 [00:04<01:12,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7188 ||:   6%|6         | 7/110 [00:04<01:08,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7334 ||:   7%|7         | 8/110 [00:05<01:07,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7379 ||:   8%|8         | 9/110 [00:06<01:04,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7328 ||:   9%|9         | 10/110 [00:06<01:03,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7315 ||:  10%|#         | 11/110 [00:07<01:05,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7317 ||:  11%|#         | 12/110 [00:08<01:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7296 ||:  12%|#1        | 13/110 [00:08<01:07,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7215 ||:  13%|#2        | 14/110 [00:09<01:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7280 ||:  14%|#3        | 15/110 [00:10<01:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7250 ||:  15%|#4        | 16/110 [00:10<01:02,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7211 ||:  15%|#5        | 17/110 [00:11<01:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7278 ||:  16%|#6        | 18/110 [00:12<00:59,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7245 ||:  17%|#7        | 19/110 [00:12<00:58,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7233 ||:  18%|#8        | 20/110 [00:13<01:00,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7205 ||:  19%|#9        | 21/110 [00:14<00:58,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7248 ||:  20%|##        | 22/110 [00:14<00:59,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7230 ||:  21%|##        | 23/110 [00:15<00:58,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7287 ||:  22%|##1       | 24/110 [00:16<00:55,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7308 ||:  23%|##2       | 25/110 [00:16<00:54,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7381 ||:  24%|##3       | 26/110 [00:17<00:54,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7406 ||:  25%|##4       | 27/110 [00:18<00:52,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7425 ||:  25%|##5       | 28/110 [00:18<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7456 ||:  26%|##6       | 29/110 [00:19<00:52,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7469 ||:  27%|##7       | 30/110 [00:20<01:11,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7521 ||:  28%|##8       | 31/110 [00:21<01:03,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7532 ||:  29%|##9       | 32/110 [00:22<00:59,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7541 ||:  30%|###       | 33/110 [00:22<00:56,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7561 ||:  31%|###       | 34/110 [00:23<00:54,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7563 ||:  32%|###1      | 35/110 [00:24<00:56,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7549 ||:  33%|###2      | 36/110 [00:24<00:52,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7535 ||:  34%|###3      | 37/110 [00:25<00:55,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7557 ||:  35%|###4      | 38/110 [00:26<00:56,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7587 ||:  35%|###5      | 39/110 [00:27<00:51,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7563 ||:  36%|###6      | 40/110 [00:27<00:48,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7574 ||:  37%|###7      | 41/110 [00:28<00:46,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7572 ||:  38%|###8      | 42/110 [00:29<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7592 ||:  39%|###9      | 43/110 [00:29<00:44,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7597 ||:  40%|####      | 44/110 [00:30<00:45,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7569 ||:  41%|####      | 45/110 [00:31<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7560 ||:  42%|####1     | 46/110 [00:31<00:43,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7547 ||:  43%|####2     | 47/110 [00:32<00:45,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7567 ||:  44%|####3     | 48/110 [00:33<00:43,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7574 ||:  45%|####4     | 49/110 [00:34<00:44,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7579 ||:  45%|####5     | 50/110 [00:34<00:41,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7573 ||:  46%|####6     | 51/110 [00:35<00:38,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7575 ||:  47%|####7     | 52/110 [00:35<00:36,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7612 ||:  48%|####8     | 53/110 [00:36<00:37,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7622 ||:  49%|####9     | 54/110 [00:37<00:35,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7623 ||:  50%|#####     | 55/110 [00:37<00:34,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7621 ||:  51%|#####     | 56/110 [00:38<00:34,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7612 ||:  52%|#####1    | 57/110 [00:39<00:32,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7606 ||:  53%|#####2    | 58/110 [00:39<00:32,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7605 ||:  54%|#####3    | 59/110 [00:40<00:31,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7600 ||:  55%|#####4    | 60/110 [00:40<00:31,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7598 ||:  55%|#####5    | 61/110 [00:41<00:31,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7584 ||:  56%|#####6    | 62/110 [00:42<00:30,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7616 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7615 ||:  58%|#####8    | 64/110 [00:43<00:30,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7614 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7614 ||:  60%|######    | 66/110 [00:45<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7597 ||:  61%|######    | 67/110 [00:45<00:29,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7606 ||:  62%|######1   | 68/110 [00:46<00:29,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7595 ||:  63%|######2   | 69/110 [00:47<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7600 ||:  64%|######3   | 70/110 [00:47<00:26,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7617 ||:  65%|######4   | 71/110 [00:48<00:25,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7609 ||:  65%|######5   | 72/110 [00:49<00:24,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7604 ||:  66%|######6   | 73/110 [00:49<00:24,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7590 ||:  67%|######7   | 74/110 [00:50<00:23,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7582 ||:  68%|######8   | 75/110 [00:50<00:22,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7591 ||:  69%|######9   | 76/110 [00:51<00:21,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7596 ||:  70%|#######   | 77/110 [00:52<00:21,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7618 ||:  71%|#######   | 78/110 [00:52<00:20,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7637 ||:  72%|#######1  | 79/110 [00:53<00:19,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7640 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7641 ||:  74%|#######3  | 81/110 [00:54<00:18,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7647 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7662 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7668 ||:  76%|#######6  | 84/110 [00:56<00:16,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7679 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7674 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7678 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7665 ||:  80%|########  | 88/110 [00:59<00:14,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7664 ||:  81%|########  | 89/110 [01:00<00:14,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7683 ||:  82%|########1 | 90/110 [01:00<00:13,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7672 ||:  83%|########2 | 91/110 [01:01<00:12,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7674 ||:  84%|########3 | 92/110 [01:02<00:11,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7682 ||:  85%|########4 | 93/110 [01:02<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7685 ||:  85%|########5 | 94/110 [01:03<00:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7690 ||:  86%|########6 | 95/110 [01:04<00:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7687 ||:  87%|########7 | 96/110 [01:04<00:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7701 ||:  88%|########8 | 97/110 [01:05<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7711 ||:  89%|########9 | 98/110 [01:06<00:08,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7709 ||:  90%|######### | 99/110 [01:07<00:08,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7712 ||:  91%|######### | 100/110 [01:07<00:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7717 ||:  92%|#########1| 101/110 [01:08<00:06,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7716 ||:  93%|#########2| 102/110 [01:09<00:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7710 ||:  94%|#########3| 103/110 [01:09<00:04,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7724 ||:  95%|#########4| 104/110 [01:10<00:04,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7722 ||:  95%|#########5| 105/110 [01:11<00:03,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7722 ||:  96%|#########6| 106/110 [01:12<00:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7718 ||:  97%|#########7| 107/110 [01:12<00:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7724 ||:  98%|#########8| 108/110 [01:13<00:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7720 ||:  99%|#########9| 109/110 [01:14<00:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7711 ||: 100%|##########| 110/110 [01:14<00:00,  1.75it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7711 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.3576, acc: 0.5000, no_result: 0.0938, loss: 0.7466 ||:   4%|4         | 1/24 [00:00<00:07,  2.93it/s]
+BLEU: 0.3743, acc: 0.5156, no_result: 0.0938, loss: 0.6954 ||:   8%|8         | 2/24 [00:00<00:07,  2.93it/s]
+BLEU: 0.3494, acc: 0.5104, no_result: 0.1042, loss: 0.7379 ||:  12%|#2        | 3/24 [00:01<00:06,  3.01it/s]
+BLEU: 0.3651, acc: 0.5234, no_result: 0.1094, loss: 0.7520 ||:  17%|#6        | 4/24 [00:01<00:06,  2.88it/s]
+BLEU: 0.4128, acc: 0.5687, no_result: 0.1062, loss: 0.7177 ||:  21%|##        | 5/24 [00:01<00:06,  2.83it/s]
+BLEU: 0.4174, acc: 0.5469, no_result: 0.1094, loss: 0.7339 ||:  25%|##5       | 6/24 [00:02<00:06,  2.94it/s]
+BLEU: 0.4004, acc: 0.5446, no_result: 0.1027, loss: 0.7360 ||:  29%|##9       | 7/24 [00:02<00:05,  3.06it/s]
+BLEU: 0.3787, acc: 0.5391, no_result: 0.1016, loss: 0.7417 ||:  33%|###3      | 8/24 [00:02<00:05,  3.10it/s]
+BLEU: 0.3871, acc: 0.5382, no_result: 0.1076, loss: 0.7372 ||:  38%|###7      | 9/24 [00:03<00:04,  3.04it/s]
+BLEU: 0.4084, acc: 0.5219, no_result: 0.1125, loss: 0.7421 ||:  42%|####1     | 10/24 [00:03<00:04,  2.95it/s]
+BLEU: 0.4277, acc: 0.5142, no_result: 0.1165, loss: 0.7513 ||:  46%|####5     | 11/24 [00:03<00:04,  2.81it/s]
+BLEU: 0.4290, acc: 0.5339, no_result: 0.1094, loss: 0.7471 ||:  50%|#####     | 12/24 [00:04<00:04,  2.92it/s]
+BLEU: 0.4120, acc: 0.5168, no_result: 0.1154, loss: 0.7712 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.81it/s]
+BLEU: 0.4144, acc: 0.5156, no_result: 0.1116, loss: 0.7762 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.75it/s]
+BLEU: 0.3914, acc: 0.5021, no_result: 0.1333, loss: 0.8118 ||:  62%|######2   | 15/24 [00:05<00:03,  2.54it/s]
+BLEU: 0.3851, acc: 0.4902, no_result: 0.1367, loss: 0.8257 ||:  67%|######6   | 16/24 [00:05<00:03,  2.50it/s]
+BLEU: 0.3775, acc: 0.4835, no_result: 0.1452, loss: 0.8330 ||:  71%|#######   | 17/24 [00:06<00:02,  2.56it/s]
+BLEU: 0.3759, acc: 0.4792, no_result: 0.1458, loss: 0.8359 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.54it/s]
+BLEU: 0.3776, acc: 0.4786, no_result: 0.1431, loss: 0.8262 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.63it/s]
+BLEU: 0.3790, acc: 0.4781, no_result: 0.1391, loss: 0.8259 ||:  83%|########3 | 20/24 [00:07<00:01,  2.78it/s]
+BLEU: 0.3744, acc: 0.4688, no_result: 0.1369, loss: 0.8271 ||:  88%|########7 | 21/24 [00:07<00:01,  2.78it/s]
+BLEU: 0.3781, acc: 0.4602, no_result: 0.1364, loss: 0.8243 ||:  92%|#########1| 22/24 [00:07<00:00,  2.73it/s]
+BLEU: 0.3790, acc: 0.4688, no_result: 0.1359, loss: 0.8178 ||:  96%|#########5| 23/24 [00:08<00:00,  2.79it/s]
+BLEU: 0.3784, acc: 0.4585, no_result: 0.1348, loss: 0.8320 ||: 100%|##########| 24/24 [00:08<00:00,  3.49it/s]
+BLEU: 0.3784, acc: 0.4585, no_result: 0.1348, loss: 0.8320 ||: 100%|##########| 24/24 [00:08<00:00,  2.87it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6469 ||:   1%|          | 1/110 [00:00<01:08,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6632 ||:   2%|1         | 2/110 [00:01<01:07,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6707 ||:   3%|2         | 3/110 [00:01<01:06,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7087 ||:   4%|3         | 4/110 [00:02<01:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7154 ||:   5%|4         | 5/110 [00:03<01:08,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7111 ||:   5%|5         | 6/110 [00:03<01:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7264 ||:   6%|6         | 7/110 [00:04<01:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7392 ||:   7%|7         | 8/110 [00:05<01:06,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7326 ||:   8%|8         | 9/110 [00:05<01:05,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7268 ||:   9%|9         | 10/110 [00:06<01:02,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7234 ||:  10%|#         | 11/110 [00:07<01:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7083 ||:  11%|#         | 12/110 [00:07<01:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7160 ||:  12%|#1        | 13/110 [00:08<01:03,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7206 ||:  13%|#2        | 14/110 [00:08<01:00,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7171 ||:  14%|#3        | 15/110 [00:09<00:59,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7232 ||:  15%|#4        | 16/110 [00:10<01:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7214 ||:  15%|#5        | 17/110 [00:10<01:00,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7202 ||:  16%|#6        | 18/110 [00:11<01:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7156 ||:  17%|#7        | 19/110 [00:12<00:58,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7265 ||:  18%|#8        | 20/110 [00:13<01:22,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7270 ||:  19%|#9        | 21/110 [00:14<01:15,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7309 ||:  20%|##        | 22/110 [00:15<01:10,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7322 ||:  21%|##        | 23/110 [00:15<01:08,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7302 ||:  22%|##1       | 24/110 [00:16<01:04,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7258 ||:  23%|##2       | 25/110 [00:17<01:00,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7249 ||:  24%|##3       | 26/110 [00:17<00:59,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7204 ||:  25%|##4       | 27/110 [00:18<01:00,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7164 ||:  25%|##5       | 28/110 [00:19<00:57,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7135 ||:  26%|##6       | 29/110 [00:20<00:57,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7119 ||:  27%|##7       | 30/110 [00:20<00:54,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7119 ||:  28%|##8       | 31/110 [00:21<00:52,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7096 ||:  29%|##9       | 32/110 [00:21<00:50,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7098 ||:  30%|###       | 33/110 [00:22<00:50,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7138 ||:  31%|###       | 34/110 [00:23<00:50,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7128 ||:  32%|###1      | 35/110 [00:24<00:51,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7119 ||:  33%|###2      | 36/110 [00:24<00:49,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7151 ||:  34%|###3      | 37/110 [00:25<00:50,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7156 ||:  35%|###4      | 38/110 [00:26<00:48,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7205 ||:  35%|###5      | 39/110 [00:26<00:46,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7219 ||:  36%|###6      | 40/110 [00:27<00:45,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7205 ||:  37%|###7      | 41/110 [00:27<00:44,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7187 ||:  38%|###8      | 42/110 [00:28<00:43,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7165 ||:  39%|###9      | 43/110 [00:29<00:41,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7174 ||:  40%|####      | 44/110 [00:29<00:40,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7188 ||:  41%|####      | 45/110 [00:30<00:40,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7199 ||:  42%|####1     | 46/110 [00:31<00:40,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7204 ||:  43%|####2     | 47/110 [00:31<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7224 ||:  44%|####3     | 48/110 [00:32<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7226 ||:  45%|####4     | 49/110 [00:33<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7266 ||:  45%|####5     | 50/110 [00:33<00:40,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7247 ||:  46%|####6     | 51/110 [00:34<00:38,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7239 ||:  47%|####7     | 52/110 [00:35<00:37,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7227 ||:  48%|####8     | 53/110 [00:35<00:37,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7217 ||:  49%|####9     | 54/110 [00:36<00:35,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7237 ||:  50%|#####     | 55/110 [00:37<00:35,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7240 ||:  51%|#####     | 56/110 [00:37<00:35,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7262 ||:  52%|#####1    | 57/110 [00:38<00:36,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7266 ||:  53%|#####2    | 58/110 [00:39<00:35,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7273 ||:  54%|#####3    | 59/110 [00:39<00:34,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7267 ||:  55%|#####4    | 60/110 [00:40<00:33,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7271 ||:  55%|#####5    | 61/110 [00:41<00:31,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7278 ||:  56%|#####6    | 62/110 [00:41<00:31,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7294 ||:  57%|#####7    | 63/110 [00:42<00:32,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7291 ||:  58%|#####8    | 64/110 [00:43<00:32,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7298 ||:  59%|#####9    | 65/110 [00:43<00:29,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7294 ||:  60%|######    | 66/110 [00:44<00:29,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7298 ||:  61%|######    | 67/110 [00:45<00:28,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7313 ||:  62%|######1   | 68/110 [00:45<00:27,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7323 ||:  63%|######2   | 69/110 [00:46<00:27,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7319 ||:  64%|######3   | 70/110 [00:47<00:26,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7306 ||:  65%|######4   | 71/110 [00:47<00:25,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7295 ||:  65%|######5   | 72/110 [00:48<00:25,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7312 ||:  66%|######6   | 73/110 [00:49<00:25,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7320 ||:  67%|######7   | 74/110 [00:49<00:25,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7314 ||:  68%|######8   | 75/110 [00:50<00:24,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7316 ||:  69%|######9   | 76/110 [00:51<00:22,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7317 ||:  70%|#######   | 77/110 [00:51<00:22,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7322 ||:  71%|#######   | 78/110 [00:52<00:22,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7323 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7313 ||:  73%|#######2  | 80/110 [00:53<00:19,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7299 ||:  74%|#######3  | 81/110 [00:54<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7302 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7303 ||:  75%|#######5  | 83/110 [00:55<00:18,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7300 ||:  76%|#######6  | 84/110 [00:56<00:17,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7285 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7289 ||:  78%|#######8  | 86/110 [00:57<00:15,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7293 ||:  79%|#######9  | 87/110 [00:58<00:14,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7314 ||:  80%|########  | 88/110 [00:59<00:14,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7327 ||:  81%|########  | 89/110 [00:59<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7327 ||:  82%|########1 | 90/110 [01:00<00:14,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7311 ||:  83%|########2 | 91/110 [01:01<00:13,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7310 ||:  84%|########3 | 92/110 [01:01<00:12,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7313 ||:  85%|########4 | 93/110 [01:02<00:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7330 ||:  85%|########5 | 94/110 [01:03<00:11,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7337 ||:  86%|########6 | 95/110 [01:04<00:10,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7340 ||:  87%|########7 | 96/110 [01:04<00:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7343 ||:  88%|########8 | 97/110 [01:05<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7355 ||:  89%|########9 | 98/110 [01:06<00:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7362 ||:  90%|######### | 99/110 [01:06<00:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7363 ||:  91%|######### | 100/110 [01:07<00:07,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7363 ||:  92%|#########1| 101/110 [01:08<00:06,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7368 ||:  93%|#########2| 102/110 [01:09<00:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7357 ||:  94%|#########3| 103/110 [01:09<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7363 ||:  95%|#########4| 104/110 [01:10<00:04,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7369 ||:  95%|#########5| 105/110 [01:11<00:03,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7365 ||:  96%|#########6| 106/110 [01:11<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7361 ||:  97%|#########7| 107/110 [01:12<00:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7360 ||:  98%|#########8| 108/110 [01:12<00:01,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7359 ||:  99%|#########9| 109/110 [01:13<00:00,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7373 ||: 100%|##########| 110/110 [01:13<00:00,  1.87it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7373 ||: 100%|##########| 110/110 [01:13<00:00,  1.49it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.3877, acc: 0.5938, no_result: 0.0312, loss: 0.6427 ||:   4%|4         | 1/24 [00:00<00:08,  2.80it/s]
+BLEU: 0.4100, acc: 0.5781, no_result: 0.0938, loss: 0.5965 ||:   8%|8         | 2/24 [00:00<00:07,  2.91it/s]
+BLEU: 0.3802, acc: 0.5729, no_result: 0.0938, loss: 0.6735 ||:  12%|#2        | 3/24 [00:01<00:06,  3.02it/s]
+BLEU: 0.3989, acc: 0.5547, no_result: 0.0938, loss: 0.7068 ||:  17%|#6        | 4/24 [00:01<00:06,  2.90it/s]
+BLEU: 0.4373, acc: 0.5875, no_result: 0.0938, loss: 0.6753 ||:  21%|##        | 5/24 [00:01<00:06,  2.87it/s]
+BLEU: 0.4360, acc: 0.5625, no_result: 0.0990, loss: 0.6984 ||:  25%|##5       | 6/24 [00:02<00:06,  3.00it/s]
+BLEU: 0.4203, acc: 0.5625, no_result: 0.0938, loss: 0.7009 ||:  29%|##9       | 7/24 [00:02<00:05,  3.10it/s]
+BLEU: 0.4064, acc: 0.5703, no_result: 0.0938, loss: 0.7088 ||:  33%|###3      | 8/24 [00:02<00:05,  3.12it/s]
+BLEU: 0.4145, acc: 0.5694, no_result: 0.0903, loss: 0.7096 ||:  38%|###7      | 9/24 [00:02<00:04,  3.10it/s]
+BLEU: 0.4194, acc: 0.5500, no_result: 0.0875, loss: 0.7170 ||:  42%|####1     | 10/24 [00:03<00:04,  3.02it/s]
+BLEU: 0.4202, acc: 0.5369, no_result: 0.0938, loss: 0.7287 ||:  46%|####5     | 11/24 [00:03<00:04,  2.88it/s]
+BLEU: 0.4262, acc: 0.5547, no_result: 0.0859, loss: 0.7235 ||:  50%|#####     | 12/24 [00:04<00:04,  2.96it/s]
+BLEU: 0.4106, acc: 0.5409, no_result: 0.0938, loss: 0.7500 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.84it/s]
+BLEU: 0.4178, acc: 0.5335, no_result: 0.0893, loss: 0.7545 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.79it/s]
+BLEU: 0.3913, acc: 0.5208, no_result: 0.1125, loss: 0.7854 ||:  62%|######2   | 15/24 [00:05<00:03,  2.58it/s]
+BLEU: 0.3792, acc: 0.5000, no_result: 0.1191, loss: 0.7969 ||:  67%|######6   | 16/24 [00:05<00:03,  2.54it/s]
+BLEU: 0.3720, acc: 0.4945, no_result: 0.1268, loss: 0.8025 ||:  71%|#######   | 17/24 [00:06<00:02,  2.59it/s]
+BLEU: 0.3733, acc: 0.5017, no_result: 0.1250, loss: 0.8029 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.58it/s]
+BLEU: 0.3765, acc: 0.5033, no_result: 0.1201, loss: 0.7938 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.68it/s]
+BLEU: 0.3815, acc: 0.5016, no_result: 0.1156, loss: 0.7927 ||:  83%|########3 | 20/24 [00:07<00:01,  2.80it/s]
+BLEU: 0.3818, acc: 0.4940, no_result: 0.1131, loss: 0.7912 ||:  88%|########7 | 21/24 [00:07<00:01,  2.81it/s]
+BLEU: 0.3819, acc: 0.4901, no_result: 0.1136, loss: 0.7898 ||:  92%|#########1| 22/24 [00:07<00:00,  2.75it/s]
+BLEU: 0.3818, acc: 0.4946, no_result: 0.1114, loss: 0.7853 ||:  96%|#########5| 23/24 [00:08<00:00,  2.84it/s]
+BLEU: 0.3808, acc: 0.4832, no_result: 0.1114, loss: 0.7973 ||: 100%|##########| 24/24 [00:08<00:00,  3.53it/s]
+BLEU: 0.3808, acc: 0.4832, no_result: 0.1114, loss: 0.7973 ||: 100%|##########| 24/24 [00:08<00:00,  2.91it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6319 ||:   1%|          | 1/110 [00:00<01:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6254 ||:   2%|1         | 2/110 [00:01<01:08,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6303 ||:   3%|2         | 3/110 [00:02<01:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6161 ||:   4%|3         | 4/110 [00:02<01:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6166 ||:   5%|4         | 5/110 [00:03<01:18,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6309 ||:   5%|5         | 6/110 [00:04<01:12,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6467 ||:   6%|6         | 7/110 [00:04<01:14,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6528 ||:   7%|7         | 8/110 [00:05<01:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6668 ||:   8%|8         | 9/110 [00:06<01:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6658 ||:   9%|9         | 10/110 [00:07<01:35,  1.04it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6654 ||:  10%|#         | 11/110 [00:08<01:26,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6678 ||:  11%|#         | 12/110 [00:09<01:20,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6707 ||:  12%|#1        | 13/110 [00:09<01:13,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6774 ||:  13%|#2        | 14/110 [00:10<01:08,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6734 ||:  14%|#3        | 15/110 [00:11<01:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6707 ||:  15%|#4        | 16/110 [00:11<01:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6723 ||:  15%|#5        | 17/110 [00:12<01:03,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6761 ||:  16%|#6        | 18/110 [00:13<01:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6731 ||:  17%|#7        | 19/110 [00:13<00:58,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6709 ||:  18%|#8        | 20/110 [00:14<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6714 ||:  19%|#9        | 21/110 [00:15<01:01,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6752 ||:  20%|##        | 22/110 [00:15<00:58,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6750 ||:  21%|##        | 23/110 [00:16<00:56,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6792 ||:  22%|##1       | 24/110 [00:16<00:54,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6874 ||:  23%|##2       | 25/110 [00:17<00:54,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6905 ||:  24%|##3       | 26/110 [00:18<01:00,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6913 ||:  25%|##4       | 27/110 [00:19<01:02,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6940 ||:  25%|##5       | 28/110 [00:19<00:58,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6931 ||:  26%|##6       | 29/110 [00:20<00:58,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6948 ||:  27%|##7       | 30/110 [00:21<00:55,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6940 ||:  28%|##8       | 31/110 [00:21<00:53,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6910 ||:  29%|##9       | 32/110 [00:22<00:52,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6915 ||:  30%|###       | 33/110 [00:23<00:52,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6956 ||:  31%|###       | 34/110 [00:23<00:50,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6970 ||:  32%|###1      | 35/110 [00:24<00:49,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6981 ||:  33%|###2      | 36/110 [00:25<00:51,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6972 ||:  34%|###3      | 37/110 [00:26<00:52,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6970 ||:  35%|###4      | 38/110 [00:26<00:49,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6958 ||:  35%|###5      | 39/110 [00:27<00:47,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6973 ||:  36%|###6      | 40/110 [00:28<00:45,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6980 ||:  37%|###7      | 41/110 [00:28<00:43,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6961 ||:  38%|###8      | 42/110 [00:29<00:41,  1.66it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6967 ||:  39%|###9      | 43/110 [00:29<00:39,  1.68it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6982 ||:  40%|####      | 44/110 [00:30<00:42,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6992 ||:  41%|####      | 45/110 [00:31<00:46,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6986 ||:  42%|####1     | 46/110 [00:31<00:44,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7001 ||:  43%|####2     | 47/110 [00:32<00:42,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7013 ||:  44%|####3     | 48/110 [00:33<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7019 ||:  45%|####4     | 49/110 [00:34<00:43,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7017 ||:  45%|####5     | 50/110 [00:34<00:41,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7032 ||:  46%|####6     | 51/110 [00:35<00:39,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7037 ||:  47%|####7     | 52/110 [00:35<00:38,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7027 ||:  48%|####8     | 53/110 [00:36<00:36,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7023 ||:  49%|####9     | 54/110 [00:37<00:36,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7037 ||:  50%|#####     | 55/110 [00:37<00:36,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7019 ||:  51%|#####     | 56/110 [00:38<00:34,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7017 ||:  52%|#####1    | 57/110 [00:39<00:33,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6980 ||:  53%|#####2    | 58/110 [00:39<00:32,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7001 ||:  54%|#####3    | 59/110 [00:40<00:34,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6997 ||:  55%|#####4    | 60/110 [00:41<00:34,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6991 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6989 ||:  56%|#####6    | 62/110 [00:42<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6978 ||:  57%|#####7    | 63/110 [00:43<00:31,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6967 ||:  58%|#####8    | 64/110 [00:43<00:31,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6983 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6984 ||:  60%|######    | 66/110 [00:45<00:31,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6985 ||:  61%|######    | 67/110 [00:46<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6989 ||:  62%|######1   | 68/110 [00:46<00:29,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7003 ||:  63%|######2   | 69/110 [00:47<00:27,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7000 ||:  64%|######3   | 70/110 [00:48<00:26,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7025 ||:  65%|######4   | 71/110 [00:48<00:25,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7007 ||:  65%|######5   | 72/110 [00:49<00:24,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7027 ||:  66%|######6   | 73/110 [00:49<00:23,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7035 ||:  67%|######7   | 74/110 [00:50<00:23,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7034 ||:  68%|######8   | 75/110 [00:51<00:21,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7048 ||:  69%|######9   | 76/110 [00:51<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7052 ||:  70%|#######   | 77/110 [00:52<00:22,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7059 ||:  71%|#######   | 78/110 [00:53<00:20,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7074 ||:  72%|#######1  | 79/110 [00:53<00:19,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7081 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7087 ||:  74%|#######3  | 81/110 [00:55<00:18,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7108 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7110 ||:  75%|#######5  | 83/110 [00:56<00:18,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7119 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7150 ||:  77%|#######7  | 85/110 [00:57<00:17,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7150 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7158 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7155 ||:  80%|########  | 88/110 [00:59<00:14,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7154 ||:  81%|########  | 89/110 [01:00<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7157 ||:  82%|########1 | 90/110 [01:01<00:13,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7150 ||:  83%|########2 | 91/110 [01:01<00:12,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7150 ||:  84%|########3 | 92/110 [01:02<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7149 ||:  85%|########4 | 93/110 [01:03<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7149 ||:  85%|########5 | 94/110 [01:03<00:10,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7149 ||:  86%|########6 | 95/110 [01:04<00:09,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7152 ||:  87%|########7 | 96/110 [01:05<00:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7148 ||:  88%|########8 | 97/110 [01:05<00:08,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7155 ||:  89%|########9 | 98/110 [01:06<00:08,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7161 ||:  90%|######### | 99/110 [01:07<00:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7159 ||:  91%|######### | 100/110 [01:07<00:06,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7166 ||:  92%|#########1| 101/110 [01:08<00:06,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7162 ||:  93%|#########2| 102/110 [01:09<00:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7166 ||:  94%|#########3| 103/110 [01:09<00:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7174 ||:  95%|#########4| 104/110 [01:10<00:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7173 ||:  95%|#########5| 105/110 [01:11<00:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7179 ||:  96%|#########6| 106/110 [01:11<00:02,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7181 ||:  97%|#########7| 107/110 [01:12<00:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7181 ||:  98%|#########8| 108/110 [01:12<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7191 ||:  99%|#########9| 109/110 [01:13<00:00,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7182 ||: 100%|##########| 110/110 [01:14<00:00,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.7182 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.4196, acc: 0.5312, no_result: 0.0312, loss: 0.6667 ||:   4%|4         | 1/24 [00:00<00:08,  2.76it/s]
+BLEU: 0.4234, acc: 0.5781, no_result: 0.0312, loss: 0.6381 ||:   8%|8         | 2/24 [00:00<00:07,  2.91it/s]
+BLEU: 0.3903, acc: 0.5729, no_result: 0.0417, loss: 0.7086 ||:  12%|#2        | 3/24 [00:01<00:07,  2.98it/s]
+BLEU: 0.3751, acc: 0.5234, no_result: 0.1016, loss: 0.7630 ||:  17%|#6        | 4/24 [00:01<00:07,  2.83it/s]
+BLEU: 0.4220, acc: 0.5813, no_result: 0.0938, loss: 0.7225 ||:  21%|##        | 5/24 [00:01<00:06,  2.80it/s]
+BLEU: 0.4214, acc: 0.5417, no_result: 0.0885, loss: 0.7298 ||:  25%|##5       | 6/24 [00:02<00:06,  2.91it/s]
+BLEU: 0.4049, acc: 0.5446, no_result: 0.0804, loss: 0.7279 ||:  29%|##9       | 7/24 [00:02<00:05,  2.98it/s]
+BLEU: 0.3928, acc: 0.5430, no_result: 0.0859, loss: 0.7329 ||:  33%|###3      | 8/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.4072, acc: 0.5417, no_result: 0.0833, loss: 0.7278 ||:  38%|###7      | 9/24 [00:03<00:05,  2.92it/s]
+BLEU: 0.4134, acc: 0.5250, no_result: 0.0781, loss: 0.7282 ||:  42%|####1     | 10/24 [00:03<00:04,  2.88it/s]
+BLEU: 0.4133, acc: 0.5085, no_result: 0.0852, loss: 0.7338 ||:  46%|####5     | 11/24 [00:03<00:04,  2.75it/s]
+BLEU: 0.4199, acc: 0.5286, no_result: 0.0781, loss: 0.7286 ||:  50%|#####     | 12/24 [00:04<00:04,  2.84it/s]
+BLEU: 0.4068, acc: 0.5168, no_result: 0.0841, loss: 0.7533 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.64it/s]
+BLEU: 0.4048, acc: 0.5112, no_result: 0.0826, loss: 0.7596 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.52it/s]
+BLEU: 0.3770, acc: 0.4917, no_result: 0.1062, loss: 0.7927 ||:  62%|######2   | 15/24 [00:05<00:03,  2.30it/s]
+BLEU: 0.3602, acc: 0.4727, no_result: 0.1113, loss: 0.8074 ||:  67%|######6   | 16/24 [00:06<00:03,  2.23it/s]
+BLEU: 0.3503, acc: 0.4706, no_result: 0.1195, loss: 0.8119 ||:  71%|#######   | 17/24 [00:06<00:03,  2.24it/s]
+BLEU: 0.3431, acc: 0.4705, no_result: 0.1215, loss: 0.8129 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.22it/s]
+BLEU: 0.3452, acc: 0.4720, no_result: 0.1168, loss: 0.8018 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.32it/s]
+BLEU: 0.3500, acc: 0.4781, no_result: 0.1125, loss: 0.8002 ||:  83%|########3 | 20/24 [00:07<00:01,  2.45it/s]
+BLEU: 0.3524, acc: 0.4747, no_result: 0.1101, loss: 0.7979 ||:  88%|########7 | 21/24 [00:08<00:01,  2.44it/s]
+BLEU: 0.3541, acc: 0.4631, no_result: 0.1136, loss: 0.7963 ||:  92%|#########1| 22/24 [00:08<00:00,  2.39it/s]
+BLEU: 0.3533, acc: 0.4701, no_result: 0.1155, loss: 0.7904 ||:  96%|#########5| 23/24 [00:08<00:00,  2.42it/s]
+BLEU: 0.3527, acc: 0.4644, no_result: 0.1153, loss: 0.8009 ||: 100%|##########| 24/24 [00:09<00:00,  2.98it/s]
+BLEU: 0.3527, acc: 0.4644, no_result: 0.1153, loss: 0.8009 ||: 100%|##########| 24/24 [00:09<00:00,  2.63it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6250 ||:   1%|          | 1/110 [00:00<01:19,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5491 ||:   2%|1         | 2/110 [00:01<01:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5954 ||:   3%|2         | 3/110 [00:02<01:13,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6219 ||:   4%|3         | 4/110 [00:02<01:17,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6309 ||:   5%|4         | 5/110 [00:03<01:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6385 ||:   5%|5         | 6/110 [00:04<01:12,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6511 ||:   6%|6         | 7/110 [00:04<01:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6399 ||:   7%|7         | 8/110 [00:05<01:08,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6376 ||:   8%|8         | 9/110 [00:06<01:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6366 ||:   9%|9         | 10/110 [00:06<01:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6443 ||:  10%|#         | 11/110 [00:07<01:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6467 ||:  11%|#         | 12/110 [00:08<01:09,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6492 ||:  12%|#1        | 13/110 [00:09<01:10,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6569 ||:  13%|#2        | 14/110 [00:09<01:09,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6549 ||:  14%|#3        | 15/110 [00:10<01:07,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6484 ||:  15%|#4        | 16/110 [00:11<01:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6481 ||:  15%|#5        | 17/110 [00:11<01:03,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6464 ||:  16%|#6        | 18/110 [00:12<01:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6517 ||:  17%|#7        | 19/110 [00:13<01:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6551 ||:  18%|#8        | 20/110 [00:14<01:03,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6556 ||:  19%|#9        | 21/110 [00:14<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6567 ||:  20%|##        | 22/110 [00:15<01:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6582 ||:  21%|##        | 23/110 [00:16<01:06,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6608 ||:  22%|##1       | 24/110 [00:17<01:06,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6634 ||:  23%|##2       | 25/110 [00:17<01:03,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6663 ||:  24%|##3       | 26/110 [00:18<01:04,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6694 ||:  25%|##4       | 27/110 [00:19<01:02,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6713 ||:  25%|##5       | 28/110 [00:20<00:59,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6696 ||:  26%|##6       | 29/110 [00:20<00:57,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6729 ||:  27%|##7       | 30/110 [00:21<00:55,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6695 ||:  28%|##8       | 31/110 [00:22<00:54,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6673 ||:  29%|##9       | 32/110 [00:22<00:54,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6661 ||:  30%|###       | 33/110 [00:23<00:53,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6664 ||:  31%|###       | 34/110 [00:24<00:54,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6684 ||:  32%|###1      | 35/110 [00:25<00:57,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6690 ||:  33%|###2      | 36/110 [00:25<00:54,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6685 ||:  34%|###3      | 37/110 [00:26<00:54,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6715 ||:  35%|###4      | 38/110 [00:27<00:54,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6731 ||:  35%|###5      | 39/110 [00:28<00:57,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6739 ||:  36%|###6      | 40/110 [00:28<00:55,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6732 ||:  37%|###7      | 41/110 [00:29<00:52,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6739 ||:  38%|###8      | 42/110 [00:30<00:53,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6754 ||:  39%|###9      | 43/110 [00:31<00:55,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6735 ||:  40%|####      | 44/110 [00:32<00:51,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6733 ||:  41%|####      | 45/110 [00:32<00:49,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6759 ||:  42%|####1     | 46/110 [00:33<00:48,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6738 ||:  43%|####2     | 47/110 [00:34<00:48,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6728 ||:  44%|####3     | 48/110 [00:35<00:46,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6723 ||:  45%|####4     | 49/110 [00:35<00:43,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6748 ||:  45%|####5     | 50/110 [00:36<00:42,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6746 ||:  46%|####6     | 51/110 [00:37<00:40,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6755 ||:  47%|####7     | 52/110 [00:37<00:42,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6770 ||:  48%|####8     | 53/110 [00:38<00:40,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6784 ||:  49%|####9     | 54/110 [00:39<00:40,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6785 ||:  50%|#####     | 55/110 [00:39<00:38,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6774 ||:  51%|#####     | 56/110 [00:40<00:36,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6745 ||:  52%|#####1    | 57/110 [00:41<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6725 ||:  53%|#####2    | 58/110 [00:42<00:38,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6730 ||:  54%|#####3    | 59/110 [00:42<00:36,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6737 ||:  55%|#####4    | 60/110 [00:43<00:37,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6747 ||:  55%|#####5    | 61/110 [00:44<00:36,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6747 ||:  56%|#####6    | 62/110 [00:45<00:35,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6760 ||:  57%|#####7    | 63/110 [00:45<00:35,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6775 ||:  58%|#####8    | 64/110 [00:46<00:33,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6774 ||:  59%|#####9    | 65/110 [00:47<00:33,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6778 ||:  60%|######    | 66/110 [00:47<00:31,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6799 ||:  61%|######    | 67/110 [00:48<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6770 ||:  62%|######1   | 68/110 [00:49<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6756 ||:  63%|######2   | 69/110 [00:50<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6760 ||:  64%|######3   | 70/110 [00:50<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6770 ||:  65%|######4   | 71/110 [00:51<00:27,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6755 ||:  65%|######5   | 72/110 [00:52<00:26,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6758 ||:  66%|######6   | 73/110 [00:52<00:25,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6759 ||:  67%|######7   | 74/110 [00:53<00:24,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6753 ||:  68%|######8   | 75/110 [00:54<00:24,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6765 ||:  69%|######9   | 76/110 [00:54<00:24,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6777 ||:  70%|#######   | 77/110 [00:55<00:23,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6778 ||:  71%|#######   | 78/110 [00:56<00:22,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6770 ||:  72%|#######1  | 79/110 [00:57<00:21,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6773 ||:  73%|#######2  | 80/110 [00:57<00:20,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6764 ||:  74%|#######3  | 81/110 [00:58<00:19,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6772 ||:  75%|#######4  | 82/110 [00:59<00:20,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6785 ||:  75%|#######5  | 83/110 [01:00<00:20,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6803 ||:  76%|#######6  | 84/110 [01:00<00:20,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6799 ||:  77%|#######7  | 85/110 [01:01<00:18,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6788 ||:  78%|#######8  | 86/110 [01:02<00:17,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6790 ||:  79%|#######9  | 87/110 [01:03<00:16,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6794 ||:  80%|########  | 88/110 [01:03<00:15,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6805 ||:  81%|########  | 89/110 [01:04<00:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6816 ||:  82%|########1 | 90/110 [01:05<00:14,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6806 ||:  83%|########2 | 91/110 [01:05<00:13,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6788 ||:  84%|########3 | 92/110 [01:06<00:12,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6785 ||:  85%|########4 | 93/110 [01:07<00:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6782 ||:  85%|########5 | 94/110 [01:07<00:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6781 ||:  86%|########6 | 95/110 [01:08<00:10,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6770 ||:  87%|########7 | 96/110 [01:09<00:10,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6773 ||:  88%|########8 | 97/110 [01:09<00:09,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6780 ||:  89%|########9 | 98/110 [01:10<00:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6787 ||:  90%|######### | 99/110 [01:11<00:07,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6790 ||:  91%|######### | 100/110 [01:12<00:09,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6783 ||:  92%|#########1| 101/110 [01:13<00:07,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6781 ||:  93%|#########2| 102/110 [01:14<00:06,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6776 ||:  94%|#########3| 103/110 [01:14<00:05,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6772 ||:  95%|#########4| 104/110 [01:15<00:04,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6774 ||:  95%|#########5| 105/110 [01:16<00:03,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6775 ||:  96%|#########6| 106/110 [01:17<00:03,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6779 ||:  97%|#########7| 107/110 [01:17<00:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6781 ||:  98%|#########8| 108/110 [01:18<00:01,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6789 ||:  99%|#########9| 109/110 [01:19<00:00,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6801 ||: 100%|##########| 110/110 [01:19<00:00,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6801 ||: 100%|##########| 110/110 [01:19<00:00,  1.38it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.4404, acc: 0.5938, no_result: 0.0625, loss: 0.6631 ||:   4%|4         | 1/24 [00:00<00:09,  2.47it/s]
+BLEU: 0.4573, acc: 0.6406, no_result: 0.0625, loss: 0.6195 ||:   8%|8         | 2/24 [00:00<00:08,  2.47it/s]
+BLEU: 0.4171, acc: 0.6354, no_result: 0.0938, loss: 0.6988 ||:  12%|#2        | 3/24 [00:01<00:08,  2.51it/s]
+BLEU: 0.4158, acc: 0.6172, no_result: 0.0859, loss: 0.7134 ||:  17%|#6        | 4/24 [00:01<00:08,  2.44it/s]
+BLEU: 0.4661, acc: 0.6375, no_result: 0.0875, loss: 0.6861 ||:  21%|##        | 5/24 [00:02<00:07,  2.45it/s]
+BLEU: 0.4694, acc: 0.5885, no_result: 0.0833, loss: 0.7004 ||:  25%|##5       | 6/24 [00:02<00:07,  2.56it/s]
+BLEU: 0.4452, acc: 0.5893, no_result: 0.0804, loss: 0.6958 ||:  29%|##9       | 7/24 [00:02<00:06,  2.63it/s]
+BLEU: 0.4335, acc: 0.5977, no_result: 0.0820, loss: 0.6955 ||:  33%|###3      | 8/24 [00:03<00:06,  2.64it/s]
+BLEU: 0.4516, acc: 0.6007, no_result: 0.0799, loss: 0.6900 ||:  38%|###7      | 9/24 [00:03<00:05,  2.61it/s]
+BLEU: 0.4613, acc: 0.5813, no_result: 0.0844, loss: 0.6958 ||:  42%|####1     | 10/24 [00:03<00:05,  2.54it/s]
+BLEU: 0.4593, acc: 0.5682, no_result: 0.0881, loss: 0.7027 ||:  46%|####5     | 11/24 [00:04<00:05,  2.42it/s]
+BLEU: 0.4623, acc: 0.5755, no_result: 0.0859, loss: 0.7009 ||:  50%|#####     | 12/24 [00:04<00:04,  2.47it/s]
+BLEU: 0.4513, acc: 0.5625, no_result: 0.0913, loss: 0.7258 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.36it/s]
+BLEU: 0.4543, acc: 0.5580, no_result: 0.0893, loss: 0.7374 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.31it/s]
+BLEU: 0.4329, acc: 0.5417, no_result: 0.1083, loss: 0.7680 ||:  62%|######2   | 15/24 [00:06<00:04,  2.18it/s]
+BLEU: 0.4240, acc: 0.5254, no_result: 0.1191, loss: 0.7827 ||:  67%|######6   | 16/24 [00:06<00:03,  2.13it/s]
+BLEU: 0.4235, acc: 0.5257, no_result: 0.1287, loss: 0.7883 ||:  71%|#######   | 17/24 [00:07<00:03,  2.16it/s]
+BLEU: 0.4225, acc: 0.5260, no_result: 0.1302, loss: 0.7934 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.15it/s]
+BLEU: 0.4249, acc: 0.5280, no_result: 0.1283, loss: 0.7845 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.22it/s]
+BLEU: 0.4296, acc: 0.5312, no_result: 0.1250, loss: 0.7833 ||:  83%|########3 | 20/24 [00:08<00:01,  2.35it/s]
+BLEU: 0.4278, acc: 0.5238, no_result: 0.1220, loss: 0.7819 ||:  88%|########7 | 21/24 [00:08<00:01,  2.34it/s]
+BLEU: 0.4321, acc: 0.5185, no_result: 0.1236, loss: 0.7792 ||:  92%|#########1| 22/24 [00:09<00:00,  2.31it/s]
+BLEU: 0.4307, acc: 0.5272, no_result: 0.1209, loss: 0.7722 ||:  96%|#########5| 23/24 [00:09<00:00,  2.41it/s]
+BLEU: 0.4308, acc: 0.5237, no_result: 0.1205, loss: 0.7903 ||: 100%|##########| 24/24 [00:09<00:00,  3.04it/s]
+BLEU: 0.4308, acc: 0.5237, no_result: 0.1205, loss: 0.7903 ||: 100%|##########| 24/24 [00:09<00:00,  2.45it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5424 ||:   1%|          | 1/110 [00:00<01:19,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5659 ||:   2%|1         | 2/110 [00:01<01:23,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5972 ||:   3%|2         | 3/110 [00:02<01:14,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6123 ||:   4%|3         | 4/110 [00:02<01:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6135 ||:   5%|4         | 5/110 [00:03<01:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6159 ||:   5%|5         | 6/110 [00:04<01:06,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6251 ||:   6%|6         | 7/110 [00:04<01:04,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6097 ||:   7%|7         | 8/110 [00:05<01:03,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6146 ||:   8%|8         | 9/110 [00:05<01:06,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6196 ||:   9%|9         | 10/110 [00:06<01:11,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6262 ||:  10%|#         | 11/110 [00:07<01:12,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6290 ||:  11%|#         | 12/110 [00:08<01:11,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6304 ||:  12%|#1        | 13/110 [00:08<01:09,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6309 ||:  13%|#2        | 14/110 [00:09<01:07,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6345 ||:  14%|#3        | 15/110 [00:10<01:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6353 ||:  15%|#4        | 16/110 [00:11<01:06,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6365 ||:  15%|#5        | 17/110 [00:11<01:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6380 ||:  16%|#6        | 18/110 [00:12<01:07,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6409 ||:  17%|#7        | 19/110 [00:13<01:07,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6388 ||:  18%|#8        | 20/110 [00:14<01:10,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6390 ||:  19%|#9        | 21/110 [00:14<01:07,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6404 ||:  20%|##        | 22/110 [00:15<01:03,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6389 ||:  21%|##        | 23/110 [00:16<01:02,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6412 ||:  22%|##1       | 24/110 [00:17<01:03,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6407 ||:  23%|##2       | 25/110 [00:17<01:03,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6410 ||:  24%|##3       | 26/110 [00:18<01:04,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6377 ||:  25%|##4       | 27/110 [00:19<01:01,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6419 ||:  25%|##5       | 28/110 [00:20<01:00,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6410 ||:  26%|##6       | 29/110 [00:20<01:00,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6415 ||:  27%|##7       | 30/110 [00:21<01:04,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6411 ||:  28%|##8       | 31/110 [00:22<01:03,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6427 ||:  29%|##9       | 32/110 [00:23<01:00,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6418 ||:  30%|###       | 33/110 [00:23<00:58,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6368 ||:  31%|###       | 34/110 [00:24<00:55,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6347 ||:  32%|###1      | 35/110 [00:25<00:53,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6412 ||:  33%|###2      | 36/110 [00:26<00:52,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6425 ||:  34%|###3      | 37/110 [00:26<00:56,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6480 ||:  35%|###4      | 38/110 [00:27<00:55,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6474 ||:  35%|###5      | 39/110 [00:28<00:53,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6475 ||:  36%|###6      | 40/110 [00:29<00:52,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6479 ||:  37%|###7      | 41/110 [00:29<00:51,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6503 ||:  38%|###8      | 42/110 [00:30<00:49,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6484 ||:  39%|###9      | 43/110 [00:31<00:50,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6471 ||:  40%|####      | 44/110 [00:32<00:49,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6510 ||:  41%|####      | 45/110 [00:32<00:47,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6507 ||:  42%|####1     | 46/110 [00:33<00:46,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6526 ||:  43%|####2     | 47/110 [00:34<00:46,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6549 ||:  44%|####3     | 48/110 [00:34<00:44,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6553 ||:  45%|####4     | 49/110 [00:35<00:42,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6535 ||:  45%|####5     | 50/110 [00:36<00:40,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6550 ||:  46%|####6     | 51/110 [00:36<00:39,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6560 ||:  47%|####7     | 52/110 [00:37<00:39,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6556 ||:  48%|####8     | 53/110 [00:38<00:39,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6554 ||:  49%|####9     | 54/110 [00:39<00:38,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6544 ||:  50%|#####     | 55/110 [00:39<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6537 ||:  51%|#####     | 56/110 [00:40<00:37,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6541 ||:  52%|#####1    | 57/110 [00:41<00:37,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6535 ||:  53%|#####2    | 58/110 [00:41<00:36,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6526 ||:  54%|#####3    | 59/110 [00:42<00:38,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6528 ||:  55%|#####4    | 60/110 [00:43<00:36,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6536 ||:  55%|#####5    | 61/110 [00:44<00:36,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6546 ||:  56%|#####6    | 62/110 [00:44<00:36,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6547 ||:  57%|#####7    | 63/110 [00:45<00:34,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6542 ||:  58%|#####8    | 64/110 [00:46<00:33,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6536 ||:  59%|#####9    | 65/110 [00:47<00:33,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6537 ||:  60%|######    | 66/110 [00:47<00:31,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6540 ||:  61%|######    | 67/110 [00:48<00:29,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6541 ||:  62%|######1   | 68/110 [00:49<00:29,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6580 ||:  63%|######2   | 69/110 [00:49<00:28,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6596 ||:  64%|######3   | 70/110 [00:50<00:29,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6597 ||:  65%|######4   | 71/110 [00:51<00:28,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6585 ||:  65%|######5   | 72/110 [00:52<00:26,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6587 ||:  66%|######6   | 73/110 [00:52<00:25,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6584 ||:  67%|######7   | 74/110 [00:53<00:24,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6576 ||:  68%|######8   | 75/110 [00:54<00:24,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6581 ||:  69%|######9   | 76/110 [00:54<00:23,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6569 ||:  70%|#######   | 77/110 [00:55<00:22,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6578 ||:  71%|#######   | 78/110 [00:56<00:21,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6562 ||:  72%|#######1  | 79/110 [00:56<00:21,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6572 ||:  73%|#######2  | 80/110 [00:57<00:20,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6571 ||:  74%|#######3  | 81/110 [00:58<00:20,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6570 ||:  75%|#######4  | 82/110 [00:58<00:19,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6567 ||:  75%|#######5  | 83/110 [00:59<00:18,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6570 ||:  76%|#######6  | 84/110 [01:00<00:17,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6582 ||:  77%|#######7  | 85/110 [01:01<00:18,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6598 ||:  78%|#######8  | 86/110 [01:01<00:17,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6599 ||:  79%|#######9  | 87/110 [01:02<00:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6604 ||:  80%|########  | 88/110 [01:03<00:15,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6608 ||:  81%|########  | 89/110 [01:03<00:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6601 ||:  82%|########1 | 90/110 [01:05<00:19,  1.02it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6590 ||:  83%|########2 | 91/110 [01:06<00:16,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6584 ||:  84%|########3 | 92/110 [01:06<00:14,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6589 ||:  85%|########4 | 93/110 [01:07<00:13,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6595 ||:  85%|########5 | 94/110 [01:08<00:12,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6588 ||:  86%|########6 | 95/110 [01:09<00:11,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6589 ||:  87%|########7 | 96/110 [01:09<00:10,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6583 ||:  88%|########8 | 97/110 [01:10<00:09,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6585 ||:  89%|########9 | 98/110 [01:11<00:08,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6585 ||:  90%|######### | 99/110 [01:11<00:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6587 ||:  91%|######### | 100/110 [01:12<00:07,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6598 ||:  92%|#########1| 101/110 [01:13<00:06,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6598 ||:  93%|#########2| 102/110 [01:13<00:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6582 ||:  94%|#########3| 103/110 [01:14<00:04,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6585 ||:  95%|#########4| 104/110 [01:15<00:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6577 ||:  95%|#########5| 105/110 [01:16<00:03,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6585 ||:  96%|#########6| 106/110 [01:16<00:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6586 ||:  97%|#########7| 107/110 [01:17<00:02,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6583 ||:  98%|#########8| 108/110 [01:18<00:01,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6568 ||:  99%|#########9| 109/110 [01:18<00:00,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6559 ||: 100%|##########| 110/110 [01:19<00:00,  1.66it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6559 ||: 100%|##########| 110/110 [01:19<00:00,  1.39it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.3657, acc: 0.5000, no_result: 0.0938, loss: 0.6245 ||:   4%|4         | 1/24 [00:00<00:07,  2.89it/s]
+BLEU: 0.3642, acc: 0.5156, no_result: 0.1094, loss: 0.6239 ||:   8%|8         | 2/24 [00:00<00:07,  2.91it/s]
+BLEU: 0.3527, acc: 0.5312, no_result: 0.1146, loss: 0.6753 ||:  12%|#2        | 3/24 [00:01<00:07,  2.95it/s]
+BLEU: 0.4009, acc: 0.5391, no_result: 0.1016, loss: 0.6901 ||:  17%|#6        | 4/24 [00:01<00:07,  2.84it/s]
+BLEU: 0.4620, acc: 0.5875, no_result: 0.0938, loss: 0.6613 ||:  21%|##        | 5/24 [00:01<00:06,  2.80it/s]
+BLEU: 0.4584, acc: 0.5677, no_result: 0.0885, loss: 0.6700 ||:  25%|##5       | 6/24 [00:02<00:06,  2.92it/s]
+BLEU: 0.4317, acc: 0.5759, no_result: 0.0759, loss: 0.6708 ||:  29%|##9       | 7/24 [00:02<00:05,  3.05it/s]
+BLEU: 0.4159, acc: 0.5820, no_result: 0.0781, loss: 0.6748 ||:  33%|###3      | 8/24 [00:02<00:05,  3.07it/s]
+BLEU: 0.4224, acc: 0.5833, no_result: 0.0799, loss: 0.6804 ||:  38%|###7      | 9/24 [00:03<00:04,  3.02it/s]
+BLEU: 0.4400, acc: 0.5781, no_result: 0.0750, loss: 0.6896 ||:  42%|####1     | 10/24 [00:03<00:04,  2.93it/s]
+BLEU: 0.4474, acc: 0.5653, no_result: 0.0795, loss: 0.6989 ||:  46%|####5     | 11/24 [00:03<00:04,  2.74it/s]
+BLEU: 0.4448, acc: 0.5677, no_result: 0.0781, loss: 0.6964 ||:  50%|#####     | 12/24 [00:04<00:04,  2.82it/s]
+BLEU: 0.4360, acc: 0.5625, no_result: 0.0865, loss: 0.7193 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.4412, acc: 0.5469, no_result: 0.0848, loss: 0.7279 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.64it/s]
+BLEU: 0.4188, acc: 0.5333, no_result: 0.1042, loss: 0.7608 ||:  62%|######2   | 15/24 [00:05<00:03,  2.45it/s]
+BLEU: 0.4061, acc: 0.5117, no_result: 0.1152, loss: 0.7754 ||:  67%|######6   | 16/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.4006, acc: 0.5110, no_result: 0.1213, loss: 0.7751 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.4026, acc: 0.5122, no_result: 0.1233, loss: 0.7760 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.4060, acc: 0.5148, no_result: 0.1217, loss: 0.7650 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.4085, acc: 0.5172, no_result: 0.1172, loss: 0.7621 ||:  83%|########3 | 20/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.4056, acc: 0.5074, no_result: 0.1146, loss: 0.7634 ||:  88%|########7 | 21/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.4100, acc: 0.4986, no_result: 0.1179, loss: 0.7640 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.4089, acc: 0.5054, no_result: 0.1182, loss: 0.7564 ||:  96%|#########5| 23/24 [00:08<00:00,  2.73it/s]
+BLEU: 0.4078, acc: 0.4983, no_result: 0.1179, loss: 0.7638 ||: 100%|##########| 24/24 [00:08<00:00,  3.39it/s]
+BLEU: 0.4078, acc: 0.4983, no_result: 0.1179, loss: 0.7638 ||: 100%|##########| 24/24 [00:08<00:00,  2.80it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5901 ||:   1%|          | 1/110 [00:00<01:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5878 ||:   2%|1         | 2/110 [00:01<01:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6092 ||:   3%|2         | 3/110 [00:02<01:20,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6181 ||:   4%|3         | 4/110 [00:02<01:17,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6239 ||:   5%|4         | 5/110 [00:03<01:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6079 ||:   5%|5         | 6/110 [00:04<01:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6266 ||:   6%|6         | 7/110 [00:04<01:08,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6149 ||:   7%|7         | 8/110 [00:05<01:10,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6225 ||:   8%|8         | 9/110 [00:06<01:09,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6103 ||:   9%|9         | 10/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6038 ||:  10%|#         | 11/110 [00:07<01:10,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6182 ||:  11%|#         | 12/110 [00:08<01:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6190 ||:  12%|#1        | 13/110 [00:09<01:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6235 ||:  13%|#2        | 14/110 [00:09<01:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6170 ||:  14%|#3        | 15/110 [00:10<01:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6203 ||:  15%|#4        | 16/110 [00:11<01:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6179 ||:  15%|#5        | 17/110 [00:11<01:07,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6147 ||:  16%|#6        | 18/110 [00:12<01:08,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6233 ||:  17%|#7        | 19/110 [00:13<01:06,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6219 ||:  18%|#8        | 20/110 [00:14<01:06,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6264 ||:  19%|#9        | 21/110 [00:14<01:05,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6248 ||:  20%|##        | 22/110 [00:15<01:03,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6239 ||:  21%|##        | 23/110 [00:16<01:02,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6238 ||:  22%|##1       | 24/110 [00:16<00:59,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6209 ||:  23%|##2       | 25/110 [00:17<01:00,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6205 ||:  24%|##3       | 26/110 [00:18<00:59,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6223 ||:  25%|##4       | 27/110 [00:19<01:04,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6212 ||:  25%|##5       | 28/110 [00:20<01:03,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6221 ||:  26%|##6       | 29/110 [00:20<01:02,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6209 ||:  27%|##7       | 30/110 [00:21<00:58,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6185 ||:  28%|##8       | 31/110 [00:22<00:58,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6232 ||:  29%|##9       | 32/110 [00:22<00:54,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6231 ||:  30%|###       | 33/110 [00:23<00:54,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6228 ||:  31%|###       | 34/110 [00:24<00:53,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6213 ||:  32%|###1      | 35/110 [00:24<00:52,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6200 ||:  33%|###2      | 36/110 [00:25<00:53,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6206 ||:  34%|###3      | 37/110 [00:26<00:50,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6194 ||:  35%|###4      | 38/110 [00:26<00:47,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6138 ||:  35%|###5      | 39/110 [00:27<00:46,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6127 ||:  36%|###6      | 40/110 [00:28<00:47,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6166 ||:  37%|###7      | 41/110 [00:29<00:48,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6194 ||:  38%|###8      | 42/110 [00:29<00:46,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6157 ||:  39%|###9      | 43/110 [00:30<00:44,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6136 ||:  40%|####      | 44/110 [00:30<00:42,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6163 ||:  41%|####      | 45/110 [00:31<00:42,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6160 ||:  42%|####1     | 46/110 [00:32<00:41,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6144 ||:  43%|####2     | 47/110 [00:33<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6148 ||:  44%|####3     | 48/110 [00:33<00:45,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6162 ||:  45%|####4     | 49/110 [00:34<00:42,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6151 ||:  45%|####5     | 50/110 [00:35<00:43,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6135 ||:  46%|####6     | 51/110 [00:35<00:41,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6155 ||:  47%|####7     | 52/110 [00:36<00:40,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6148 ||:  48%|####8     | 53/110 [00:37<00:39,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6146 ||:  49%|####9     | 54/110 [00:38<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6161 ||:  50%|#####     | 55/110 [00:38<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6158 ||:  51%|#####     | 56/110 [00:39<00:37,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6123 ||:  52%|#####1    | 57/110 [00:40<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6139 ||:  53%|#####2    | 58/110 [00:40<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6140 ||:  54%|#####3    | 59/110 [00:41<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6153 ||:  55%|#####4    | 60/110 [00:42<00:34,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6151 ||:  55%|#####5    | 61/110 [00:42<00:33,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6158 ||:  56%|#####6    | 62/110 [00:43<00:32,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6170 ||:  57%|#####7    | 63/110 [00:44<00:31,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6177 ||:  58%|#####8    | 64/110 [00:44<00:30,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6173 ||:  59%|#####9    | 65/110 [00:45<00:32,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6202 ||:  60%|######    | 66/110 [00:46<00:32,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6212 ||:  61%|######    | 67/110 [00:47<00:32,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6211 ||:  62%|######1   | 68/110 [00:47<00:30,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6218 ||:  63%|######2   | 69/110 [00:48<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6210 ||:  64%|######3   | 70/110 [00:49<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6209 ||:  65%|######4   | 71/110 [00:50<00:28,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6202 ||:  65%|######5   | 72/110 [00:50<00:27,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6205 ||:  66%|######6   | 73/110 [00:51<00:26,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6207 ||:  67%|######7   | 74/110 [00:52<00:27,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6203 ||:  68%|######8   | 75/110 [00:53<00:25,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6205 ||:  69%|######9   | 76/110 [00:53<00:26,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6194 ||:  70%|#######   | 77/110 [00:54<00:25,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6196 ||:  71%|#######   | 78/110 [00:55<00:23,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6202 ||:  72%|#######1  | 79/110 [00:55<00:22,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6201 ||:  73%|#######2  | 80/110 [00:57<00:29,  1.01it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6201 ||:  74%|#######3  | 81/110 [00:58<00:28,  1.02it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6204 ||:  75%|#######4  | 82/110 [00:59<00:24,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6216 ||:  75%|#######5  | 83/110 [00:59<00:22,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6219 ||:  76%|#######6  | 84/110 [01:00<00:21,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6235 ||:  77%|#######7  | 85/110 [01:01<00:19,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6239 ||:  78%|#######8  | 86/110 [01:02<00:19,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6240 ||:  79%|#######9  | 87/110 [01:02<00:17,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6255 ||:  80%|########  | 88/110 [01:03<00:16,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6244 ||:  81%|########  | 89/110 [01:04<00:15,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6248 ||:  82%|########1 | 90/110 [01:05<00:14,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6247 ||:  83%|########2 | 91/110 [01:05<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6240 ||:  84%|########3 | 92/110 [01:06<00:12,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6241 ||:  85%|########4 | 93/110 [01:07<00:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6244 ||:  85%|########5 | 94/110 [01:07<00:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6243 ||:  86%|########6 | 95/110 [01:08<00:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6248 ||:  87%|########7 | 96/110 [01:09<00:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6252 ||:  88%|########8 | 97/110 [01:09<00:09,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6265 ||:  89%|########9 | 98/110 [01:10<00:08,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6277 ||:  90%|######### | 99/110 [01:11<00:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6293 ||:  91%|######### | 100/110 [01:12<00:07,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6292 ||:  92%|#########1| 101/110 [01:12<00:06,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6297 ||:  93%|#########2| 102/110 [01:13<00:05,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6297 ||:  94%|#########3| 103/110 [01:14<00:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6314 ||:  95%|#########4| 104/110 [01:14<00:04,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6307 ||:  95%|#########5| 105/110 [01:15<00:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6309 ||:  96%|#########6| 106/110 [01:16<00:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6317 ||:  97%|#########7| 107/110 [01:16<00:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6316 ||:  98%|#########8| 108/110 [01:17<00:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6318 ||:  99%|#########9| 109/110 [01:18<00:00,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6310 ||: 100%|##########| 110/110 [01:18<00:00,  1.67it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6310 ||: 100%|##########| 110/110 [01:18<00:00,  1.40it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.4243, acc: 0.5938, no_result: 0.0625, loss: 0.6347 ||:   4%|4         | 1/24 [00:00<00:08,  2.83it/s]
+BLEU: 0.4112, acc: 0.5625, no_result: 0.0625, loss: 0.6098 ||:   8%|8         | 2/24 [00:00<00:07,  2.87it/s]
+BLEU: 0.3965, acc: 0.5625, no_result: 0.1042, loss: 0.6644 ||:  12%|#2        | 3/24 [00:01<00:07,  2.91it/s]
+BLEU: 0.4362, acc: 0.5703, no_result: 0.0938, loss: 0.6714 ||:  17%|#6        | 4/24 [00:01<00:07,  2.82it/s]
+BLEU: 0.4843, acc: 0.6000, no_result: 0.0938, loss: 0.6570 ||:  21%|##        | 5/24 [00:01<00:06,  2.79it/s]
+BLEU: 0.4846, acc: 0.5625, no_result: 0.0885, loss: 0.6739 ||:  25%|##5       | 6/24 [00:02<00:06,  2.93it/s]
+BLEU: 0.4611, acc: 0.5759, no_result: 0.0804, loss: 0.6700 ||:  29%|##9       | 7/24 [00:02<00:05,  3.03it/s]
+BLEU: 0.4433, acc: 0.5781, no_result: 0.0859, loss: 0.6713 ||:  33%|###3      | 8/24 [00:02<00:05,  3.05it/s]
+BLEU: 0.4490, acc: 0.5833, no_result: 0.0833, loss: 0.6716 ||:  38%|###7      | 9/24 [00:03<00:05,  2.98it/s]
+BLEU: 0.4670, acc: 0.5781, no_result: 0.0844, loss: 0.6774 ||:  42%|####1     | 10/24 [00:03<00:04,  2.89it/s]
+BLEU: 0.4725, acc: 0.5597, no_result: 0.0852, loss: 0.6928 ||:  46%|####5     | 11/24 [00:03<00:04,  2.77it/s]
+BLEU: 0.4719, acc: 0.5625, no_result: 0.0807, loss: 0.6918 ||:  50%|#####     | 12/24 [00:04<00:04,  2.87it/s]
+BLEU: 0.4645, acc: 0.5529, no_result: 0.0938, loss: 0.7138 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.75it/s]
+BLEU: 0.4726, acc: 0.5446, no_result: 0.0938, loss: 0.7223 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.69it/s]
+BLEU: 0.4482, acc: 0.5312, no_result: 0.1042, loss: 0.7544 ||:  62%|######2   | 15/24 [00:05<00:03,  2.52it/s]
+BLEU: 0.4369, acc: 0.5137, no_result: 0.1113, loss: 0.7730 ||:  67%|######6   | 16/24 [00:05<00:03,  2.46it/s]
+BLEU: 0.4306, acc: 0.5147, no_result: 0.1232, loss: 0.7768 ||:  71%|#######   | 17/24 [00:06<00:02,  2.49it/s]
+BLEU: 0.4270, acc: 0.5156, no_result: 0.1233, loss: 0.7804 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.48it/s]
+BLEU: 0.4320, acc: 0.5197, no_result: 0.1201, loss: 0.7675 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.49it/s]
+BLEU: 0.4332, acc: 0.5219, no_result: 0.1156, loss: 0.7658 ||:  83%|########3 | 20/24 [00:07<00:01,  2.55it/s]
+BLEU: 0.4295, acc: 0.5149, no_result: 0.1146, loss: 0.7668 ||:  88%|########7 | 21/24 [00:07<00:01,  2.50it/s]
+BLEU: 0.4312, acc: 0.5099, no_result: 0.1151, loss: 0.7640 ||:  92%|#########1| 22/24 [00:08<00:00,  2.41it/s]
+BLEU: 0.4327, acc: 0.5190, no_result: 0.1128, loss: 0.7536 ||:  96%|#########5| 23/24 [00:08<00:00,  2.48it/s]
+BLEU: 0.4313, acc: 0.5113, no_result: 0.1127, loss: 0.7668 ||: 100%|##########| 24/24 [00:08<00:00,  3.03it/s]
+BLEU: 0.4313, acc: 0.5113, no_result: 0.1127, loss: 0.7668 ||: 100%|##########| 24/24 [00:08<00:00,  2.73it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6454 ||:   1%|          | 1/110 [00:00<01:20,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6099 ||:   2%|1         | 2/110 [00:01<01:25,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5866 ||:   3%|2         | 3/110 [00:02<01:25,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5943 ||:   4%|3         | 4/110 [00:03<01:21,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5962 ||:   5%|4         | 5/110 [00:03<01:20,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6085 ||:   5%|5         | 6/110 [00:04<01:20,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6165 ||:   6%|6         | 7/110 [00:05<01:17,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6334 ||:   7%|7         | 8/110 [00:06<01:23,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6211 ||:   8%|8         | 9/110 [00:06<01:16,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6249 ||:   9%|9         | 10/110 [00:07<01:20,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6164 ||:  10%|#         | 11/110 [00:08<01:18,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6149 ||:  11%|#         | 12/110 [00:09<01:19,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6096 ||:  12%|#1        | 13/110 [00:10<01:17,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6009 ||:  13%|#2        | 14/110 [00:10<01:13,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6041 ||:  14%|#3        | 15/110 [00:11<01:09,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5989 ||:  15%|#4        | 16/110 [00:12<01:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5984 ||:  15%|#5        | 17/110 [00:12<01:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6017 ||:  16%|#6        | 18/110 [00:13<01:06,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6038 ||:  17%|#7        | 19/110 [00:14<01:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6028 ||:  18%|#8        | 20/110 [00:15<01:04,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5976 ||:  19%|#9        | 21/110 [00:15<01:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6002 ||:  20%|##        | 22/110 [00:16<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5990 ||:  21%|##        | 23/110 [00:17<01:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5986 ||:  22%|##1       | 24/110 [00:18<01:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5962 ||:  23%|##2       | 25/110 [00:18<00:59,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5959 ||:  24%|##3       | 26/110 [00:19<00:57,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5996 ||:  25%|##4       | 27/110 [00:20<00:58,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6014 ||:  25%|##5       | 28/110 [00:20<00:57,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5986 ||:  26%|##6       | 29/110 [00:21<00:55,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5989 ||:  27%|##7       | 30/110 [00:22<00:56,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5930 ||:  28%|##8       | 31/110 [00:22<00:54,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5932 ||:  29%|##9       | 32/110 [00:23<00:55,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5916 ||:  30%|###       | 33/110 [00:24<00:54,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5937 ||:  31%|###       | 34/110 [00:24<00:53,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5962 ||:  32%|###1      | 35/110 [00:25<00:50,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5941 ||:  33%|###2      | 36/110 [00:26<00:52,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5900 ||:  34%|###3      | 37/110 [00:27<00:51,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5904 ||:  35%|###4      | 38/110 [00:27<00:50,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5877 ||:  35%|###5      | 39/110 [00:28<00:50,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5858 ||:  36%|###6      | 40/110 [00:29<00:48,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5872 ||:  37%|###7      | 41/110 [00:29<00:46,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5884 ||:  38%|###8      | 42/110 [00:30<00:47,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5840 ||:  39%|###9      | 43/110 [00:31<00:44,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5834 ||:  40%|####      | 44/110 [00:31<00:43,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5840 ||:  41%|####      | 45/110 [00:32<00:42,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5827 ||:  42%|####1     | 46/110 [00:33<00:42,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5843 ||:  43%|####2     | 47/110 [00:33<00:41,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5824 ||:  44%|####3     | 48/110 [00:34<00:39,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5846 ||:  45%|####4     | 49/110 [00:34<00:40,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5836 ||:  45%|####5     | 50/110 [00:35<00:41,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5850 ||:  46%|####6     | 51/110 [00:36<00:39,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5853 ||:  47%|####7     | 52/110 [00:37<00:39,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5854 ||:  48%|####8     | 53/110 [00:37<00:39,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5865 ||:  49%|####9     | 54/110 [00:38<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5855 ||:  50%|#####     | 55/110 [00:39<00:37,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5852 ||:  51%|#####     | 56/110 [00:39<00:37,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5846 ||:  52%|#####1    | 57/110 [00:40<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5856 ||:  53%|#####2    | 58/110 [00:41<00:35,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5854 ||:  54%|#####3    | 59/110 [00:41<00:33,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5854 ||:  55%|#####4    | 60/110 [00:42<00:32,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5849 ||:  55%|#####5    | 61/110 [00:43<00:32,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5844 ||:  56%|#####6    | 62/110 [00:43<00:32,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5824 ||:  57%|#####7    | 63/110 [00:44<00:30,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5816 ||:  58%|#####8    | 64/110 [00:45<00:29,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5806 ||:  59%|#####9    | 65/110 [00:45<00:30,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5814 ||:  60%|######    | 66/110 [00:46<00:29,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5811 ||:  61%|######    | 67/110 [00:47<00:30,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5825 ||:  62%|######1   | 68/110 [00:47<00:29,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5815 ||:  63%|######2   | 69/110 [00:48<00:28,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5823 ||:  64%|######3   | 70/110 [00:50<00:39,  1.01it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5841 ||:  65%|######4   | 71/110 [00:50<00:35,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5857 ||:  65%|######5   | 72/110 [00:51<00:33,  1.14it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5853 ||:  66%|######6   | 73/110 [00:52<00:30,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5838 ||:  67%|######7   | 74/110 [00:53<00:27,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5847 ||:  68%|######8   | 75/110 [00:53<00:26,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5851 ||:  69%|######9   | 76/110 [00:54<00:25,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5856 ||:  70%|#######   | 77/110 [00:55<00:24,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5865 ||:  71%|#######   | 78/110 [00:56<00:24,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5862 ||:  72%|#######1  | 79/110 [00:56<00:22,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5852 ||:  73%|#######2  | 80/110 [00:57<00:21,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5859 ||:  74%|#######3  | 81/110 [00:58<00:21,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5864 ||:  75%|#######4  | 82/110 [00:59<00:21,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5870 ||:  75%|#######5  | 83/110 [00:59<00:20,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5873 ||:  76%|#######6  | 84/110 [01:00<00:18,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5879 ||:  77%|#######7  | 85/110 [01:01<00:17,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5866 ||:  78%|#######8  | 86/110 [01:01<00:17,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5884 ||:  79%|#######9  | 87/110 [01:02<00:16,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5884 ||:  80%|########  | 88/110 [01:03<00:17,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5889 ||:  81%|########  | 89/110 [01:04<00:16,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5877 ||:  82%|########1 | 90/110 [01:05<00:15,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5872 ||:  83%|########2 | 91/110 [01:05<00:14,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5879 ||:  84%|########3 | 92/110 [01:06<00:13,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5880 ||:  85%|########4 | 93/110 [01:07<00:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5897 ||:  85%|########5 | 94/110 [01:07<00:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5901 ||:  86%|########6 | 95/110 [01:08<00:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5905 ||:  87%|########7 | 96/110 [01:09<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5909 ||:  88%|########8 | 97/110 [01:09<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5902 ||:  89%|########9 | 98/110 [01:10<00:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5913 ||:  90%|######### | 99/110 [01:11<00:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5908 ||:  91%|######### | 100/110 [01:11<00:06,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5913 ||:  92%|#########1| 101/110 [01:12<00:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5916 ||:  93%|#########2| 102/110 [01:13<00:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5918 ||:  94%|#########3| 103/110 [01:13<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5916 ||:  95%|#########4| 104/110 [01:14<00:04,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5921 ||:  95%|#########5| 105/110 [01:15<00:03,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5916 ||:  96%|#########6| 106/110 [01:16<00:03,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5916 ||:  97%|#########7| 107/110 [01:17<00:02,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5920 ||:  98%|#########8| 108/110 [01:17<00:01,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5929 ||:  99%|#########9| 109/110 [01:18<00:00,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5909 ||: 100%|##########| 110/110 [01:18<00:00,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5909 ||: 100%|##########| 110/110 [01:18<00:00,  1.39it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.5934, acc: 0.6562, no_result: 0.0625, loss: 0.6378 ||:   4%|4         | 1/24 [00:00<00:08,  2.84it/s]
+BLEU: 0.5456, acc: 0.6875, no_result: 0.0625, loss: 0.5958 ||:   8%|8         | 2/24 [00:00<00:07,  2.92it/s]
+BLEU: 0.5087, acc: 0.6354, no_result: 0.1042, loss: 0.6685 ||:  12%|#2        | 3/24 [00:01<00:07,  2.94it/s]
+BLEU: 0.5213, acc: 0.6328, no_result: 0.0938, loss: 0.6773 ||:  17%|#6        | 4/24 [00:01<00:07,  2.85it/s]
+BLEU: 0.5588, acc: 0.6562, no_result: 0.0875, loss: 0.6469 ||:  21%|##        | 5/24 [00:01<00:06,  2.80it/s]
+BLEU: 0.5539, acc: 0.6302, no_result: 0.0729, loss: 0.6623 ||:  25%|##5       | 6/24 [00:02<00:06,  2.95it/s]
+BLEU: 0.5390, acc: 0.6250, no_result: 0.0670, loss: 0.6584 ||:  29%|##9       | 7/24 [00:02<00:05,  3.06it/s]
+BLEU: 0.5358, acc: 0.6367, no_result: 0.0664, loss: 0.6585 ||:  33%|###3      | 8/24 [00:02<00:05,  3.08it/s]
+BLEU: 0.5311, acc: 0.6285, no_result: 0.0660, loss: 0.6584 ||:  38%|###7      | 9/24 [00:03<00:04,  3.04it/s]
+BLEU: 0.5459, acc: 0.6188, no_result: 0.0625, loss: 0.6609 ||:  42%|####1     | 10/24 [00:03<00:04,  2.96it/s]
+BLEU: 0.5478, acc: 0.6051, no_result: 0.0682, loss: 0.6761 ||:  46%|####5     | 11/24 [00:03<00:04,  2.83it/s]
+BLEU: 0.5470, acc: 0.6198, no_result: 0.0625, loss: 0.6745 ||:  50%|#####     | 12/24 [00:04<00:04,  2.92it/s]
+BLEU: 0.5369, acc: 0.6082, no_result: 0.0721, loss: 0.7010 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.80it/s]
+BLEU: 0.5426, acc: 0.5982, no_result: 0.0692, loss: 0.7103 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.74it/s]
+BLEU: 0.5118, acc: 0.5813, no_result: 0.0833, loss: 0.7431 ||:  62%|######2   | 15/24 [00:05<00:03,  2.55it/s]
+BLEU: 0.4946, acc: 0.5625, no_result: 0.0918, loss: 0.7574 ||:  67%|######6   | 16/24 [00:05<00:03,  2.48it/s]
+BLEU: 0.4914, acc: 0.5607, no_result: 0.0993, loss: 0.7600 ||:  71%|#######   | 17/24 [00:06<00:02,  2.51it/s]
+BLEU: 0.4953, acc: 0.5590, no_result: 0.1042, loss: 0.7661 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.49it/s]
+BLEU: 0.4994, acc: 0.5658, no_result: 0.1020, loss: 0.7529 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.57it/s]
+BLEU: 0.5014, acc: 0.5672, no_result: 0.1016, loss: 0.7528 ||:  83%|########3 | 20/24 [00:07<00:01,  2.70it/s]
+BLEU: 0.5009, acc: 0.5625, no_result: 0.0982, loss: 0.7517 ||:  88%|########7 | 21/24 [00:07<00:01,  2.72it/s]
+BLEU: 0.5061, acc: 0.5582, no_result: 0.0966, loss: 0.7494 ||:  92%|#########1| 22/24 [00:07<00:00,  2.70it/s]
+BLEU: 0.5049, acc: 0.5652, no_result: 0.0951, loss: 0.7403 ||:  96%|#########5| 23/24 [00:08<00:00,  2.78it/s]
+BLEU: 0.5038, acc: 0.5602, no_result: 0.0958, loss: 0.7520 ||: 100%|##########| 24/24 [00:08<00:00,  3.44it/s]
+BLEU: 0.5038, acc: 0.5602, no_result: 0.0958, loss: 0.7520 ||: 100%|##########| 24/24 [00:08<00:00,  2.85it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6467 ||:   1%|          | 1/110 [00:00<01:21,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.6029 ||:   2%|1         | 2/110 [00:01<01:23,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5824 ||:   3%|2         | 3/110 [00:02<01:19,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5741 ||:   4%|3         | 4/110 [00:02<01:14,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5691 ||:   5%|4         | 5/110 [00:03<01:13,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5518 ||:   5%|5         | 6/110 [00:04<01:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5596 ||:   6%|6         | 7/110 [00:04<01:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5511 ||:   7%|7         | 8/110 [00:05<01:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5464 ||:   8%|8         | 9/110 [00:06<01:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5495 ||:   9%|9         | 10/110 [00:07<01:11,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5542 ||:  10%|#         | 11/110 [00:07<01:14,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5527 ||:  11%|#         | 12/110 [00:08<01:15,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5620 ||:  12%|#1        | 13/110 [00:09<01:14,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5557 ||:  13%|#2        | 14/110 [00:10<01:08,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5532 ||:  14%|#3        | 15/110 [00:10<01:08,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5583 ||:  15%|#4        | 16/110 [00:11<01:13,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5579 ||:  15%|#5        | 17/110 [00:12<01:09,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5592 ||:  16%|#6        | 18/110 [00:13<01:07,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5538 ||:  17%|#7        | 19/110 [00:13<01:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5560 ||:  18%|#8        | 20/110 [00:14<01:01,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5500 ||:  19%|#9        | 21/110 [00:15<00:59,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5528 ||:  20%|##        | 22/110 [00:15<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5569 ||:  21%|##        | 23/110 [00:16<01:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5588 ||:  22%|##1       | 24/110 [00:17<01:02,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5616 ||:  23%|##2       | 25/110 [00:17<00:59,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5639 ||:  24%|##3       | 26/110 [00:18<00:57,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5673 ||:  25%|##4       | 27/110 [00:19<00:56,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5695 ||:  25%|##5       | 28/110 [00:19<00:55,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5675 ||:  26%|##6       | 29/110 [00:20<00:56,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5678 ||:  27%|##7       | 30/110 [00:21<00:56,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5661 ||:  28%|##8       | 31/110 [00:22<00:55,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5635 ||:  29%|##9       | 32/110 [00:22<00:53,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5628 ||:  30%|###       | 33/110 [00:23<00:53,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5619 ||:  31%|###       | 34/110 [00:24<00:54,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5638 ||:  32%|###1      | 35/110 [00:24<00:54,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5628 ||:  33%|###2      | 36/110 [00:25<00:52,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5660 ||:  34%|###3      | 37/110 [00:26<00:51,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5652 ||:  35%|###4      | 38/110 [00:27<00:50,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5652 ||:  35%|###5      | 39/110 [00:27<00:55,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5672 ||:  36%|###6      | 40/110 [00:28<00:53,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5667 ||:  37%|###7      | 41/110 [00:29<00:50,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5694 ||:  38%|###8      | 42/110 [00:30<00:49,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5721 ||:  39%|###9      | 43/110 [00:30<00:47,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5727 ||:  40%|####      | 44/110 [00:31<00:48,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5736 ||:  41%|####      | 45/110 [00:32<00:47,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5739 ||:  42%|####1     | 46/110 [00:32<00:45,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5741 ||:  43%|####2     | 47/110 [00:33<00:43,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5738 ||:  44%|####3     | 48/110 [00:34<00:43,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5732 ||:  45%|####4     | 49/110 [00:34<00:41,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5736 ||:  45%|####5     | 50/110 [00:35<00:41,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5751 ||:  46%|####6     | 51/110 [00:36<00:44,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5734 ||:  47%|####7     | 52/110 [00:37<00:42,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5743 ||:  48%|####8     | 53/110 [00:37<00:41,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5736 ||:  49%|####9     | 54/110 [00:38<00:40,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5720 ||:  50%|#####     | 55/110 [00:39<00:42,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5703 ||:  51%|#####     | 56/110 [00:40<00:39,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5722 ||:  52%|#####1    | 57/110 [00:40<00:38,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5715 ||:  53%|#####2    | 58/110 [00:41<00:37,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5714 ||:  54%|#####3    | 59/110 [00:42<00:34,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5701 ||:  55%|#####4    | 60/110 [00:43<00:48,  1.03it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5716 ||:  55%|#####5    | 61/110 [00:44<00:43,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5725 ||:  56%|#####6    | 62/110 [00:45<00:39,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5729 ||:  57%|#####7    | 63/110 [00:45<00:36,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5718 ||:  58%|#####8    | 64/110 [00:46<00:34,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5720 ||:  59%|#####9    | 65/110 [00:47<00:33,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5709 ||:  60%|######    | 66/110 [00:47<00:31,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5696 ||:  61%|######    | 67/110 [00:48<00:31,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5703 ||:  62%|######1   | 68/110 [00:49<00:29,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5726 ||:  63%|######2   | 69/110 [00:50<00:28,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5709 ||:  64%|######3   | 70/110 [00:50<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5696 ||:  65%|######4   | 71/110 [00:51<00:27,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5695 ||:  65%|######5   | 72/110 [00:52<00:28,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5691 ||:  66%|######6   | 73/110 [00:52<00:26,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5695 ||:  67%|######7   | 74/110 [00:53<00:26,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5682 ||:  68%|######8   | 75/110 [00:54<00:24,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5675 ||:  69%|######9   | 76/110 [00:55<00:23,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5685 ||:  70%|#######   | 77/110 [00:55<00:22,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5687 ||:  71%|#######   | 78/110 [00:56<00:21,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5683 ||:  72%|#######1  | 79/110 [00:57<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5669 ||:  73%|#######2  | 80/110 [00:57<00:20,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5662 ||:  74%|#######3  | 81/110 [00:58<00:20,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5669 ||:  75%|#######4  | 82/110 [00:59<00:19,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5669 ||:  75%|#######5  | 83/110 [00:59<00:18,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5664 ||:  76%|#######6  | 84/110 [01:00<00:17,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5655 ||:  77%|#######7  | 85/110 [01:01<00:17,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5652 ||:  78%|#######8  | 86/110 [01:01<00:16,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5654 ||:  79%|#######9  | 87/110 [01:02<00:15,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5647 ||:  80%|########  | 88/110 [01:03<00:15,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5633 ||:  81%|########  | 89/110 [01:04<00:14,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5622 ||:  82%|########1 | 90/110 [01:04<00:14,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5641 ||:  83%|########2 | 91/110 [01:05<00:13,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5649 ||:  84%|########3 | 92/110 [01:06<00:12,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5648 ||:  85%|########4 | 93/110 [01:06<00:11,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5650 ||:  85%|########5 | 94/110 [01:07<00:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5645 ||:  86%|########6 | 95/110 [01:08<00:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5639 ||:  87%|########7 | 96/110 [01:08<00:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5640 ||:  88%|########8 | 97/110 [01:09<00:08,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5637 ||:  89%|########9 | 98/110 [01:10<00:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5647 ||:  90%|######### | 99/110 [01:10<00:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5645 ||:  91%|######### | 100/110 [01:11<00:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5643 ||:  92%|#########1| 101/110 [01:12<00:06,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5640 ||:  93%|#########2| 102/110 [01:13<00:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5644 ||:  94%|#########3| 103/110 [01:13<00:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5645 ||:  95%|#########4| 104/110 [01:14<00:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5636 ||:  95%|#########5| 105/110 [01:15<00:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5630 ||:  96%|#########6| 106/110 [01:15<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5625 ||:  97%|#########7| 107/110 [01:16<00:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5624 ||:  98%|#########8| 108/110 [01:17<00:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5647 ||:  99%|#########9| 109/110 [01:17<00:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5660 ||: 100%|##########| 110/110 [01:18<00:00,  1.68it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5660 ||: 100%|##########| 110/110 [01:18<00:00,  1.41it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.5504, acc: 0.6250, no_result: 0.0625, loss: 0.6163 ||:   4%|4         | 1/24 [00:00<00:08,  2.85it/s]
+BLEU: 0.5363, acc: 0.6562, no_result: 0.0469, loss: 0.5835 ||:   8%|8         | 2/24 [00:00<00:07,  2.95it/s]
+BLEU: 0.5188, acc: 0.6042, no_result: 0.0729, loss: 0.6603 ||:  12%|#2        | 3/24 [00:01<00:07,  2.96it/s]
+BLEU: 0.5445, acc: 0.6172, no_result: 0.0625, loss: 0.6596 ||:  17%|#6        | 4/24 [00:01<00:06,  2.86it/s]
+BLEU: 0.5813, acc: 0.6562, no_result: 0.0625, loss: 0.6351 ||:  21%|##        | 5/24 [00:01<00:06,  2.82it/s]
+BLEU: 0.5917, acc: 0.6458, no_result: 0.0625, loss: 0.6497 ||:  25%|##5       | 6/24 [00:02<00:06,  2.95it/s]
+BLEU: 0.5914, acc: 0.6518, no_result: 0.0625, loss: 0.6337 ||:  29%|##9       | 7/24 [00:02<00:05,  3.06it/s]
+BLEU: 0.5899, acc: 0.6484, no_result: 0.0625, loss: 0.6414 ||:  33%|###3      | 8/24 [00:02<00:05,  3.07it/s]
+BLEU: 0.5779, acc: 0.6528, no_result: 0.0625, loss: 0.6406 ||:  38%|###7      | 9/24 [00:03<00:04,  3.04it/s]
+BLEU: 0.5795, acc: 0.6406, no_result: 0.0594, loss: 0.6446 ||:  42%|####1     | 10/24 [00:03<00:04,  2.96it/s]
+BLEU: 0.5814, acc: 0.6278, no_result: 0.0653, loss: 0.6568 ||:  46%|####5     | 11/24 [00:03<00:04,  2.82it/s]
+BLEU: 0.5800, acc: 0.6380, no_result: 0.0599, loss: 0.6596 ||:  50%|#####     | 12/24 [00:04<00:04,  2.91it/s]
+BLEU: 0.5702, acc: 0.6226, no_result: 0.0673, loss: 0.6826 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.78it/s]
+BLEU: 0.5768, acc: 0.6116, no_result: 0.0670, loss: 0.6989 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.72it/s]
+BLEU: 0.5469, acc: 0.5958, no_result: 0.0875, loss: 0.7373 ||:  62%|######2   | 15/24 [00:05<00:03,  2.52it/s]
+BLEU: 0.5322, acc: 0.5762, no_result: 0.0977, loss: 0.7556 ||:  67%|######6   | 16/24 [00:05<00:03,  2.43it/s]
+BLEU: 0.5279, acc: 0.5772, no_result: 0.1011, loss: 0.7559 ||:  71%|#######   | 17/24 [00:06<00:02,  2.49it/s]
+BLEU: 0.5279, acc: 0.5781, no_result: 0.1076, loss: 0.7585 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.47it/s]
+BLEU: 0.5340, acc: 0.5855, no_result: 0.1053, loss: 0.7421 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.57it/s]
+BLEU: 0.5356, acc: 0.5891, no_result: 0.1000, loss: 0.7386 ||:  83%|########3 | 20/24 [00:07<00:01,  2.73it/s]
+BLEU: 0.5373, acc: 0.5818, no_result: 0.0982, loss: 0.7383 ||:  88%|########7 | 21/24 [00:07<00:01,  2.73it/s]
+BLEU: 0.5410, acc: 0.5767, no_result: 0.0980, loss: 0.7374 ||:  92%|#########1| 22/24 [00:07<00:00,  2.69it/s]
+BLEU: 0.5439, acc: 0.5842, no_result: 0.0951, loss: 0.7269 ||:  96%|#########5| 23/24 [00:08<00:00,  2.79it/s]
+BLEU: 0.5428, acc: 0.5784, no_result: 0.0958, loss: 0.7350 ||: 100%|##########| 24/24 [00:08<00:00,  3.46it/s]
+BLEU: 0.5428, acc: 0.5784, no_result: 0.0958, loss: 0.7350 ||: 100%|##########| 24/24 [00:08<00:00,  2.84it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5329 ||:   1%|          | 1/110 [00:00<01:46,  1.02it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4631 ||:   2%|1         | 2/110 [00:01<01:22,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4943 ||:   3%|2         | 3/110 [00:02<01:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4646 ||:   4%|3         | 4/110 [00:03<01:20,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4679 ||:   5%|4         | 5/110 [00:03<01:20,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4818 ||:   5%|5         | 6/110 [00:04<01:17,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4978 ||:   6%|6         | 7/110 [00:05<01:15,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5059 ||:   7%|7         | 8/110 [00:06<01:15,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5003 ||:   8%|8         | 9/110 [00:06<01:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4982 ||:   9%|9         | 10/110 [00:07<01:13,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5006 ||:  10%|#         | 11/110 [00:08<01:12,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4972 ||:  11%|#         | 12/110 [00:08<01:09,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5000 ||:  12%|#1        | 13/110 [00:09<01:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5024 ||:  13%|#2        | 14/110 [00:10<01:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5085 ||:  14%|#3        | 15/110 [00:11<01:10,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5065 ||:  15%|#4        | 16/110 [00:11<01:10,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5092 ||:  15%|#5        | 17/110 [00:12<01:07,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5177 ||:  16%|#6        | 18/110 [00:13<01:09,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5118 ||:  17%|#7        | 19/110 [00:14<01:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5138 ||:  18%|#8        | 20/110 [00:14<01:03,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5103 ||:  19%|#9        | 21/110 [00:15<01:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5122 ||:  20%|##        | 22/110 [00:16<01:04,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5129 ||:  21%|##        | 23/110 [00:16<01:02,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5127 ||:  22%|##1       | 24/110 [00:17<00:59,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5098 ||:  23%|##2       | 25/110 [00:18<00:57,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5167 ||:  24%|##3       | 26/110 [00:18<00:56,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5194 ||:  25%|##4       | 27/110 [00:19<00:56,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5197 ||:  25%|##5       | 28/110 [00:20<00:55,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5203 ||:  26%|##6       | 29/110 [00:20<00:56,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5230 ||:  27%|##7       | 30/110 [00:21<00:59,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5200 ||:  28%|##8       | 31/110 [00:22<00:56,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5191 ||:  29%|##9       | 32/110 [00:23<00:55,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5205 ||:  30%|###       | 33/110 [00:23<00:53,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5207 ||:  31%|###       | 34/110 [00:24<00:51,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5168 ||:  32%|###1      | 35/110 [00:25<00:49,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5190 ||:  33%|###2      | 36/110 [00:25<00:48,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5192 ||:  34%|###3      | 37/110 [00:26<00:46,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5200 ||:  35%|###4      | 38/110 [00:26<00:47,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5183 ||:  35%|###5      | 39/110 [00:27<00:46,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5182 ||:  36%|###6      | 40/110 [00:28<00:47,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5187 ||:  37%|###7      | 41/110 [00:29<00:47,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5217 ||:  38%|###8      | 42/110 [00:29<00:46,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5220 ||:  39%|###9      | 43/110 [00:30<00:44,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5228 ||:  40%|####      | 44/110 [00:30<00:43,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5242 ||:  41%|####      | 45/110 [00:31<00:42,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5246 ||:  42%|####1     | 46/110 [00:32<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5244 ||:  43%|####2     | 47/110 [00:33<00:42,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5238 ||:  44%|####3     | 48/110 [00:33<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5236 ||:  45%|####4     | 49/110 [00:34<00:41,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5215 ||:  45%|####5     | 50/110 [00:36<01:00,  1.01s/it]
+acc: 0.0000, no_result: 0.0000, loss: 0.5196 ||:  46%|####6     | 51/110 [00:36<00:53,  1.10it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5199 ||:  47%|####7     | 52/110 [00:37<00:48,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5190 ||:  48%|####8     | 53/110 [00:38<00:44,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5203 ||:  49%|####9     | 54/110 [00:38<00:42,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5237 ||:  50%|#####     | 55/110 [00:39<00:42,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5260 ||:  51%|#####     | 56/110 [00:40<00:41,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5258 ||:  52%|#####1    | 57/110 [00:41<00:40,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5249 ||:  53%|#####2    | 58/110 [00:41<00:39,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5237 ||:  54%|#####3    | 59/110 [00:42<00:36,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5242 ||:  55%|#####4    | 60/110 [00:43<00:35,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5259 ||:  55%|#####5    | 61/110 [00:44<00:36,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5237 ||:  56%|#####6    | 62/110 [00:44<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5239 ||:  57%|#####7    | 63/110 [00:45<00:32,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5241 ||:  58%|#####8    | 64/110 [00:46<00:33,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5243 ||:  59%|#####9    | 65/110 [00:46<00:31,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5275 ||:  60%|######    | 66/110 [00:47<00:30,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5272 ||:  61%|######    | 67/110 [00:48<00:29,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5263 ||:  62%|######1   | 68/110 [00:48<00:29,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5273 ||:  63%|######2   | 69/110 [00:49<00:28,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5279 ||:  64%|######3   | 70/110 [00:50<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5281 ||:  65%|######4   | 71/110 [00:50<00:26,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5293 ||:  65%|######5   | 72/110 [00:51<00:25,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5292 ||:  66%|######6   | 73/110 [00:52<00:24,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5292 ||:  67%|######7   | 74/110 [00:52<00:24,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5292 ||:  68%|######8   | 75/110 [00:53<00:24,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5299 ||:  69%|######9   | 76/110 [00:54<00:25,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5296 ||:  70%|#######   | 77/110 [00:55<00:23,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5300 ||:  71%|#######   | 78/110 [00:55<00:23,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5294 ||:  72%|#######1  | 79/110 [00:56<00:23,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5290 ||:  73%|#######2  | 80/110 [00:57<00:22,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5275 ||:  74%|#######3  | 81/110 [00:58<00:19,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5266 ||:  75%|#######4  | 82/110 [00:58<00:19,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5268 ||:  75%|#######5  | 83/110 [00:59<00:18,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5285 ||:  76%|#######6  | 84/110 [01:00<00:18,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5289 ||:  77%|#######7  | 85/110 [01:00<00:17,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5301 ||:  78%|#######8  | 86/110 [01:01<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5296 ||:  79%|#######9  | 87/110 [01:02<00:15,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5306 ||:  80%|########  | 88/110 [01:02<00:15,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5310 ||:  81%|########  | 89/110 [01:03<00:13,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5317 ||:  82%|########1 | 90/110 [01:04<00:13,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5309 ||:  83%|########2 | 91/110 [01:04<00:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5306 ||:  84%|########3 | 92/110 [01:05<00:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5297 ||:  85%|########4 | 93/110 [01:06<00:12,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5298 ||:  85%|########5 | 94/110 [01:07<00:11,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5297 ||:  86%|########6 | 95/110 [01:07<00:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5292 ||:  87%|########7 | 96/110 [01:08<00:09,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5311 ||:  88%|########8 | 97/110 [01:09<00:09,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5323 ||:  89%|########9 | 98/110 [01:09<00:08,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5319 ||:  90%|######### | 99/110 [01:10<00:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5318 ||:  91%|######### | 100/110 [01:11<00:07,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5310 ||:  92%|#########1| 101/110 [01:12<00:06,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5307 ||:  93%|#########2| 102/110 [01:12<00:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5308 ||:  94%|#########3| 103/110 [01:13<00:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5308 ||:  95%|#########4| 104/110 [01:14<00:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5298 ||:  95%|#########5| 105/110 [01:14<00:03,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5294 ||:  96%|#########6| 106/110 [01:15<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5309 ||:  97%|#########7| 107/110 [01:16<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5309 ||:  98%|#########8| 108/110 [01:16<00:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5315 ||:  99%|#########9| 109/110 [01:17<00:00,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5331 ||: 100%|##########| 110/110 [01:18<00:00,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5331 ||: 100%|##########| 110/110 [01:18<00:00,  1.41it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.5077, acc: 0.6562, no_result: 0.0625, loss: 0.5818 ||:   4%|4         | 1/24 [00:00<00:07,  2.94it/s]
+BLEU: 0.4950, acc: 0.6562, no_result: 0.0625, loss: 0.5620 ||:   8%|8         | 2/24 [00:00<00:07,  2.94it/s]
+BLEU: 0.4962, acc: 0.6562, no_result: 0.0938, loss: 0.6375 ||:  12%|#2        | 3/24 [00:01<00:07,  2.99it/s]
+BLEU: 0.5157, acc: 0.6406, no_result: 0.0859, loss: 0.6422 ||:  17%|#6        | 4/24 [00:01<00:07,  2.85it/s]
+BLEU: 0.5633, acc: 0.6625, no_result: 0.0813, loss: 0.6095 ||:  21%|##        | 5/24 [00:01<00:06,  2.83it/s]
+BLEU: 0.5584, acc: 0.6198, no_result: 0.0833, loss: 0.6268 ||:  25%|##5       | 6/24 [00:02<00:06,  2.95it/s]
+BLEU: 0.5531, acc: 0.6339, no_result: 0.0714, loss: 0.6181 ||:  29%|##9       | 7/24 [00:02<00:05,  3.03it/s]
+BLEU: 0.5407, acc: 0.6328, no_result: 0.0742, loss: 0.6234 ||:  33%|###3      | 8/24 [00:02<00:05,  3.03it/s]
+BLEU: 0.5364, acc: 0.6389, no_result: 0.0729, loss: 0.6195 ||:  38%|###7      | 9/24 [00:03<00:05,  2.95it/s]
+BLEU: 0.5423, acc: 0.6219, no_result: 0.0719, loss: 0.6249 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.5387, acc: 0.6023, no_result: 0.0739, loss: 0.6443 ||:  46%|####5     | 11/24 [00:03<00:04,  2.72it/s]
+BLEU: 0.5385, acc: 0.6120, no_result: 0.0703, loss: 0.6446 ||:  50%|#####     | 12/24 [00:04<00:04,  2.78it/s]
+BLEU: 0.5314, acc: 0.6010, no_result: 0.0817, loss: 0.6724 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.66it/s]
+BLEU: 0.5328, acc: 0.5871, no_result: 0.0781, loss: 0.6898 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.61it/s]
+BLEU: 0.5062, acc: 0.5667, no_result: 0.0979, loss: 0.7265 ||:  62%|######2   | 15/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.4888, acc: 0.5488, no_result: 0.1094, loss: 0.7427 ||:  67%|######6   | 16/24 [00:05<00:03,  2.39it/s]
+BLEU: 0.4903, acc: 0.5478, no_result: 0.1140, loss: 0.7439 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.4909, acc: 0.5486, no_result: 0.1128, loss: 0.7485 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.5012, acc: 0.5559, no_result: 0.1118, loss: 0.7325 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.5036, acc: 0.5594, no_result: 0.1094, loss: 0.7298 ||:  83%|########3 | 20/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.5008, acc: 0.5506, no_result: 0.1071, loss: 0.7288 ||:  88%|########7 | 21/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.5050, acc: 0.5483, no_result: 0.1051, loss: 0.7277 ||:  92%|#########1| 22/24 [00:08<00:00,  2.63it/s]
+BLEU: 0.5089, acc: 0.5584, no_result: 0.1019, loss: 0.7166 ||:  96%|#########5| 23/24 [00:08<00:00,  2.70it/s]
+BLEU: 0.5072, acc: 0.5537, no_result: 0.1023, loss: 0.7253 ||: 100%|##########| 24/24 [00:08<00:00,  3.28it/s]
+BLEU: 0.5072, acc: 0.5537, no_result: 0.1023, loss: 0.7253 ||: 100%|##########| 24/24 [00:08<00:00,  2.77it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4345 ||:   1%|          | 1/110 [00:00<01:15,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5059 ||:   2%|1         | 2/110 [00:01<01:14,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5173 ||:   3%|2         | 3/110 [00:02<01:21,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5153 ||:   4%|3         | 4/110 [00:02<01:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5200 ||:   5%|4         | 5/110 [00:03<01:13,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5134 ||:   5%|5         | 6/110 [00:04<01:17,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5159 ||:   6%|6         | 7/110 [00:05<01:16,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5074 ||:   7%|7         | 8/110 [00:05<01:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5026 ||:   8%|8         | 9/110 [00:06<01:11,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5156 ||:   9%|9         | 10/110 [00:07<01:11,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5193 ||:  10%|#         | 11/110 [00:07<01:12,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5197 ||:  11%|#         | 12/110 [00:08<01:13,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5304 ||:  12%|#1        | 13/110 [00:09<01:11,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5208 ||:  13%|#2        | 14/110 [00:10<01:09,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5208 ||:  14%|#3        | 15/110 [00:10<01:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5202 ||:  15%|#4        | 16/110 [00:11<01:04,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5199 ||:  15%|#5        | 17/110 [00:12<01:11,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5170 ||:  16%|#6        | 18/110 [00:13<01:08,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5156 ||:  17%|#7        | 19/110 [00:13<01:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5111 ||:  18%|#8        | 20/110 [00:14<01:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5078 ||:  19%|#9        | 21/110 [00:15<01:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5015 ||:  20%|##        | 22/110 [00:15<01:00,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5025 ||:  21%|##        | 23/110 [00:16<00:59,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5042 ||:  22%|##1       | 24/110 [00:17<00:59,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5009 ||:  23%|##2       | 25/110 [00:17<00:57,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4991 ||:  24%|##3       | 26/110 [00:18<00:59,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5026 ||:  25%|##4       | 27/110 [00:19<01:00,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4994 ||:  25%|##5       | 28/110 [00:20<00:59,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5037 ||:  26%|##6       | 29/110 [00:20<00:57,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5053 ||:  27%|##7       | 30/110 [00:21<00:57,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5023 ||:  28%|##8       | 31/110 [00:22<00:55,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5006 ||:  29%|##9       | 32/110 [00:22<00:54,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4988 ||:  30%|###       | 33/110 [00:23<00:58,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5027 ||:  31%|###       | 34/110 [00:24<00:56,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5024 ||:  32%|###1      | 35/110 [00:25<00:56,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5031 ||:  33%|###2      | 36/110 [00:26<00:56,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5051 ||:  34%|###3      | 37/110 [00:26<00:54,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5084 ||:  35%|###4      | 38/110 [00:27<00:56,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5076 ||:  35%|###5      | 39/110 [00:28<00:56,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5053 ||:  36%|###6      | 40/110 [00:29<01:11,  1.02s/it]
+acc: 0.0000, no_result: 0.0000, loss: 0.5076 ||:  37%|###7      | 41/110 [00:30<01:03,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5088 ||:  38%|###8      | 42/110 [00:31<00:58,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5108 ||:  39%|###9      | 43/110 [00:32<00:54,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5104 ||:  40%|####      | 44/110 [00:32<00:51,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5096 ||:  41%|####      | 45/110 [00:33<00:47,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5135 ||:  42%|####1     | 46/110 [00:34<00:46,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5140 ||:  43%|####2     | 47/110 [00:34<00:45,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5131 ||:  44%|####3     | 48/110 [00:35<00:44,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5111 ||:  45%|####4     | 49/110 [00:36<00:42,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5118 ||:  45%|####5     | 50/110 [00:36<00:41,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5092 ||:  46%|####6     | 51/110 [00:37<00:43,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5102 ||:  47%|####7     | 52/110 [00:38<00:41,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5102 ||:  48%|####8     | 53/110 [00:38<00:39,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5104 ||:  49%|####9     | 54/110 [00:39<00:39,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5099 ||:  50%|#####     | 55/110 [00:40<00:38,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5116 ||:  51%|#####     | 56/110 [00:41<00:37,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5125 ||:  52%|#####1    | 57/110 [00:41<00:38,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5150 ||:  53%|#####2    | 58/110 [00:42<00:39,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5136 ||:  54%|#####3    | 59/110 [00:43<00:36,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5126 ||:  55%|#####4    | 60/110 [00:44<00:35,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5118 ||:  55%|#####5    | 61/110 [00:44<00:36,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5117 ||:  56%|#####6    | 62/110 [00:45<00:35,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5132 ||:  57%|#####7    | 63/110 [00:46<00:34,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5139 ||:  58%|#####8    | 64/110 [00:47<00:34,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5164 ||:  59%|#####9    | 65/110 [00:47<00:33,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5158 ||:  60%|######    | 66/110 [00:48<00:32,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5144 ||:  61%|######    | 67/110 [00:49<00:32,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5140 ||:  62%|######1   | 68/110 [00:50<00:30,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5133 ||:  63%|######2   | 69/110 [00:50<00:28,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5130 ||:  64%|######3   | 70/110 [00:51<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5117 ||:  65%|######4   | 71/110 [00:52<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5119 ||:  65%|######5   | 72/110 [00:52<00:26,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5117 ||:  66%|######6   | 73/110 [00:53<00:25,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5112 ||:  67%|######7   | 74/110 [00:54<00:24,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5117 ||:  68%|######8   | 75/110 [00:54<00:23,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5124 ||:  69%|######9   | 76/110 [00:55<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5123 ||:  70%|#######   | 77/110 [00:56<00:22,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5104 ||:  71%|#######   | 78/110 [00:56<00:21,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5117 ||:  72%|#######1  | 79/110 [00:57<00:21,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5121 ||:  73%|#######2  | 80/110 [00:58<00:21,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5105 ||:  74%|#######3  | 81/110 [00:59<00:20,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5094 ||:  75%|#######4  | 82/110 [00:59<00:19,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5105 ||:  75%|#######5  | 83/110 [01:00<00:18,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5108 ||:  76%|#######6  | 84/110 [01:01<00:18,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5108 ||:  77%|#######7  | 85/110 [01:01<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5109 ||:  78%|#######8  | 86/110 [01:02<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5123 ||:  79%|#######9  | 87/110 [01:03<00:15,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5117 ||:  80%|########  | 88/110 [01:03<00:15,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5120 ||:  81%|########  | 89/110 [01:04<00:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5117 ||:  82%|########1 | 90/110 [01:05<00:14,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5110 ||:  83%|########2 | 91/110 [01:06<00:13,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5106 ||:  84%|########3 | 92/110 [01:06<00:12,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5105 ||:  85%|########4 | 93/110 [01:07<00:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5114 ||:  85%|########5 | 94/110 [01:08<00:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5118 ||:  86%|########6 | 95/110 [01:08<00:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5110 ||:  87%|########7 | 96/110 [01:09<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5098 ||:  88%|########8 | 97/110 [01:10<00:09,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5093 ||:  89%|########9 | 98/110 [01:10<00:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5095 ||:  90%|######### | 99/110 [01:11<00:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5077 ||:  91%|######### | 100/110 [01:12<00:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5079 ||:  92%|#########1| 101/110 [01:12<00:06,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5092 ||:  93%|#########2| 102/110 [01:13<00:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5099 ||:  94%|#########3| 103/110 [01:14<00:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5104 ||:  95%|#########4| 104/110 [01:14<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5095 ||:  95%|#########5| 105/110 [01:15<00:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5085 ||:  96%|#########6| 106/110 [01:16<00:02,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5095 ||:  97%|#########7| 107/110 [01:16<00:01,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5099 ||:  98%|#########8| 108/110 [01:17<00:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5088 ||:  99%|#########9| 109/110 [01:18<00:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5094 ||: 100%|##########| 110/110 [01:18<00:00,  1.79it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5094 ||: 100%|##########| 110/110 [01:18<00:00,  1.40it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.5406, acc: 0.5938, no_result: 0.0000, loss: 0.5879 ||:   4%|4         | 1/24 [00:00<00:07,  2.88it/s]
+BLEU: 0.4424, acc: 0.5625, no_result: 0.0625, loss: 0.6031 ||:   8%|8         | 2/24 [00:00<00:07,  2.88it/s]
+BLEU: 0.4488, acc: 0.5833, no_result: 0.1042, loss: 0.6645 ||:  12%|#2        | 3/24 [00:01<00:07,  2.94it/s]
+BLEU: 0.5126, acc: 0.6016, no_result: 0.0859, loss: 0.6630 ||:  17%|#6        | 4/24 [00:01<00:06,  2.86it/s]
+BLEU: 0.5565, acc: 0.6312, no_result: 0.0813, loss: 0.6231 ||:  21%|##        | 5/24 [00:01<00:06,  2.85it/s]
+BLEU: 0.5490, acc: 0.6146, no_result: 0.0729, loss: 0.6306 ||:  25%|##5       | 6/24 [00:02<00:06,  2.98it/s]
+BLEU: 0.5379, acc: 0.6071, no_result: 0.0714, loss: 0.6251 ||:  29%|##9       | 7/24 [00:02<00:05,  3.09it/s]
+BLEU: 0.5311, acc: 0.6172, no_result: 0.0703, loss: 0.6309 ||:  33%|###3      | 8/24 [00:02<00:05,  3.11it/s]
+BLEU: 0.5271, acc: 0.6215, no_result: 0.0694, loss: 0.6341 ||:  38%|###7      | 9/24 [00:03<00:04,  3.05it/s]
+BLEU: 0.5445, acc: 0.6156, no_result: 0.0688, loss: 0.6442 ||:  42%|####1     | 10/24 [00:03<00:04,  2.97it/s]
+BLEU: 0.5437, acc: 0.6023, no_result: 0.0767, loss: 0.6651 ||:  46%|####5     | 11/24 [00:03<00:04,  2.81it/s]
+BLEU: 0.5382, acc: 0.6068, no_result: 0.0755, loss: 0.6647 ||:  50%|#####     | 12/24 [00:04<00:04,  2.92it/s]
+BLEU: 0.5273, acc: 0.5986, no_result: 0.0913, loss: 0.6874 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.76it/s]
+BLEU: 0.5292, acc: 0.5826, no_result: 0.0960, loss: 0.7031 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.67it/s]
+BLEU: 0.5160, acc: 0.5708, no_result: 0.1062, loss: 0.7369 ||:  62%|######2   | 15/24 [00:05<00:03,  2.53it/s]
+BLEU: 0.5028, acc: 0.5547, no_result: 0.1152, loss: 0.7518 ||:  67%|######6   | 16/24 [00:05<00:03,  2.46it/s]
+BLEU: 0.5041, acc: 0.5588, no_result: 0.1195, loss: 0.7515 ||:  71%|#######   | 17/24 [00:06<00:02,  2.53it/s]
+BLEU: 0.5053, acc: 0.5573, no_result: 0.1233, loss: 0.7559 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.51it/s]
+BLEU: 0.5106, acc: 0.5674, no_result: 0.1201, loss: 0.7388 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.60it/s]
+BLEU: 0.5111, acc: 0.5672, no_result: 0.1172, loss: 0.7386 ||:  83%|########3 | 20/24 [00:07<00:01,  2.73it/s]
+BLEU: 0.5032, acc: 0.5565, no_result: 0.1131, loss: 0.7414 ||:  88%|########7 | 21/24 [00:07<00:01,  2.74it/s]
+BLEU: 0.5085, acc: 0.5497, no_result: 0.1165, loss: 0.7396 ||:  92%|#########1| 22/24 [00:07<00:00,  2.68it/s]
+BLEU: 0.5118, acc: 0.5584, no_result: 0.1141, loss: 0.7289 ||:  96%|#########5| 23/24 [00:08<00:00,  2.75it/s]
+BLEU: 0.5109, acc: 0.5537, no_result: 0.1094, loss: 0.7373 ||: 100%|##########| 24/24 [00:08<00:00,  3.43it/s]
+BLEU: 0.5109, acc: 0.5537, no_result: 0.1094, loss: 0.7373 ||: 100%|##########| 24/24 [00:08<00:00,  2.85it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5175 ||:   1%|          | 1/110 [00:00<01:32,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4758 ||:   2%|1         | 2/110 [00:01<01:22,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4468 ||:   3%|2         | 3/110 [00:02<01:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4338 ||:   4%|3         | 4/110 [00:02<01:13,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4659 ||:   5%|4         | 5/110 [00:03<01:12,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4650 ||:   5%|5         | 6/110 [00:04<01:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4639 ||:   6%|6         | 7/110 [00:05<01:13,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4641 ||:   7%|7         | 8/110 [00:05<01:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4711 ||:   8%|8         | 9/110 [00:06<01:09,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4683 ||:   9%|9         | 10/110 [00:06<01:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4699 ||:  10%|#         | 11/110 [00:07<01:04,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4760 ||:  11%|#         | 12/110 [00:08<01:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4796 ||:  12%|#1        | 13/110 [00:09<01:08,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4783 ||:  13%|#2        | 14/110 [00:09<01:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4799 ||:  14%|#3        | 15/110 [00:10<01:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4791 ||:  15%|#4        | 16/110 [00:10<01:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4728 ||:  15%|#5        | 17/110 [00:11<01:02,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4803 ||:  16%|#6        | 18/110 [00:12<01:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4755 ||:  17%|#7        | 19/110 [00:12<00:58,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4744 ||:  18%|#8        | 20/110 [00:13<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4736 ||:  19%|#9        | 21/110 [00:14<00:58,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4729 ||:  20%|##        | 22/110 [00:14<00:58,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4754 ||:  21%|##        | 23/110 [00:15<00:58,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4795 ||:  22%|##1       | 24/110 [00:16<00:58,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4770 ||:  23%|##2       | 25/110 [00:16<00:55,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4776 ||:  24%|##3       | 26/110 [00:17<00:57,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4776 ||:  25%|##4       | 27/110 [00:18<01:02,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4775 ||:  25%|##5       | 28/110 [00:19<01:00,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4798 ||:  26%|##6       | 29/110 [00:19<00:57,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4778 ||:  27%|##7       | 30/110 [00:21<01:19,  1.01it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4799 ||:  28%|##8       | 31/110 [00:22<01:11,  1.10it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4819 ||:  29%|##9       | 32/110 [00:22<01:05,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4823 ||:  30%|###       | 33/110 [00:23<01:01,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4790 ||:  31%|###       | 34/110 [00:24<00:56,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4775 ||:  32%|###1      | 35/110 [00:24<00:53,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4790 ||:  33%|###2      | 36/110 [00:25<00:53,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4798 ||:  34%|###3      | 37/110 [00:26<00:52,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4822 ||:  35%|###4      | 38/110 [00:27<00:51,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4796 ||:  35%|###5      | 39/110 [00:27<00:51,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4824 ||:  36%|###6      | 40/110 [00:28<00:49,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4826 ||:  37%|###7      | 41/110 [00:29<00:47,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4838 ||:  38%|###8      | 42/110 [00:29<00:46,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4829 ||:  39%|###9      | 43/110 [00:30<00:47,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4825 ||:  40%|####      | 44/110 [00:31<00:45,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4822 ||:  41%|####      | 45/110 [00:31<00:43,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4829 ||:  42%|####1     | 46/110 [00:32<00:41,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4859 ||:  43%|####2     | 47/110 [00:33<00:41,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4882 ||:  44%|####3     | 48/110 [00:33<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4890 ||:  45%|####4     | 49/110 [00:34<00:41,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4881 ||:  45%|####5     | 50/110 [00:35<00:40,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4867 ||:  46%|####6     | 51/110 [00:35<00:39,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4869 ||:  47%|####7     | 52/110 [00:36<00:39,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4861 ||:  48%|####8     | 53/110 [00:37<00:39,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4870 ||:  49%|####9     | 54/110 [00:38<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4860 ||:  50%|#####     | 55/110 [00:38<00:37,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4860 ||:  51%|#####     | 56/110 [00:39<00:39,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4839 ||:  52%|#####1    | 57/110 [00:40<00:36,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4824 ||:  53%|#####2    | 58/110 [00:40<00:36,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4839 ||:  54%|#####3    | 59/110 [00:41<00:35,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4840 ||:  55%|#####4    | 60/110 [00:42<00:34,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4827 ||:  55%|#####5    | 61/110 [00:43<00:36,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4826 ||:  56%|#####6    | 62/110 [00:43<00:35,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4805 ||:  57%|#####7    | 63/110 [00:44<00:33,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4805 ||:  58%|#####8    | 64/110 [00:45<00:31,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4818 ||:  59%|#####9    | 65/110 [00:45<00:30,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4824 ||:  60%|######    | 66/110 [00:46<00:30,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4824 ||:  61%|######    | 67/110 [00:47<00:29,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4826 ||:  62%|######1   | 68/110 [00:47<00:29,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4822 ||:  63%|######2   | 69/110 [00:48<00:28,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4820 ||:  64%|######3   | 70/110 [00:49<00:29,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4847 ||:  65%|######4   | 71/110 [00:50<00:28,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4837 ||:  65%|######5   | 72/110 [00:50<00:26,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4837 ||:  66%|######6   | 73/110 [00:51<00:25,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4846 ||:  67%|######7   | 74/110 [00:52<00:24,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4831 ||:  68%|######8   | 75/110 [00:52<00:23,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4831 ||:  69%|######9   | 76/110 [00:53<00:24,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4827 ||:  70%|#######   | 77/110 [00:54<00:22,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4828 ||:  71%|#######   | 78/110 [00:54<00:22,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4826 ||:  72%|#######1  | 79/110 [00:55<00:21,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4845 ||:  73%|#######2  | 80/110 [00:56<00:23,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4841 ||:  74%|#######3  | 81/110 [00:57<00:21,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4842 ||:  75%|#######4  | 82/110 [00:58<00:21,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4847 ||:  75%|#######5  | 83/110 [00:58<00:20,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4848 ||:  76%|#######6  | 84/110 [00:59<00:18,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4854 ||:  77%|#######7  | 85/110 [01:00<00:19,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4846 ||:  78%|#######8  | 86/110 [01:00<00:17,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4830 ||:  79%|#######9  | 87/110 [01:01<00:16,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4849 ||:  80%|########  | 88/110 [01:02<00:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4842 ||:  81%|########  | 89/110 [01:02<00:14,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4839 ||:  82%|########1 | 90/110 [01:03<00:13,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4856 ||:  83%|########2 | 91/110 [01:04<00:13,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4856 ||:  84%|########3 | 92/110 [01:05<00:13,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4840 ||:  85%|########4 | 93/110 [01:06<00:12,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4837 ||:  85%|########5 | 94/110 [01:06<00:11,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4832 ||:  86%|########6 | 95/110 [01:07<00:10,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4828 ||:  87%|########7 | 96/110 [01:08<00:09,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4838 ||:  88%|########8 | 97/110 [01:08<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4843 ||:  89%|########9 | 98/110 [01:09<00:08,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4838 ||:  90%|######### | 99/110 [01:10<00:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4841 ||:  91%|######### | 100/110 [01:10<00:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4839 ||:  92%|#########1| 101/110 [01:11<00:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4847 ||:  93%|#########2| 102/110 [01:12<00:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4853 ||:  94%|#########3| 103/110 [01:13<00:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4846 ||:  95%|#########4| 104/110 [01:13<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4847 ||:  95%|#########5| 105/110 [01:14<00:03,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4849 ||:  96%|#########6| 106/110 [01:15<00:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4856 ||:  97%|#########7| 107/110 [01:15<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4853 ||:  98%|#########8| 108/110 [01:16<00:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4856 ||:  99%|#########9| 109/110 [01:17<00:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4853 ||: 100%|##########| 110/110 [01:17<00:00,  1.67it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4853 ||: 100%|##########| 110/110 [01:17<00:00,  1.42it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.5043, acc: 0.6875, no_result: 0.0312, loss: 0.6488 ||:   4%|4         | 1/24 [00:00<00:08,  2.76it/s]
+BLEU: 0.5399, acc: 0.7500, no_result: 0.0312, loss: 0.5925 ||:   8%|8         | 2/24 [00:00<00:07,  2.86it/s]
+BLEU: 0.5045, acc: 0.6875, no_result: 0.0729, loss: 0.6569 ||:  12%|#2        | 3/24 [00:01<00:07,  2.90it/s]
+BLEU: 0.5515, acc: 0.6719, no_result: 0.0625, loss: 0.6612 ||:  17%|#6        | 4/24 [00:01<00:07,  2.81it/s]
+BLEU: 0.6086, acc: 0.6875, no_result: 0.0688, loss: 0.6333 ||:  21%|##        | 5/24 [00:01<00:06,  2.79it/s]
+BLEU: 0.6165, acc: 0.6510, no_result: 0.0729, loss: 0.6502 ||:  25%|##5       | 6/24 [00:02<00:06,  2.90it/s]
+BLEU: 0.6125, acc: 0.6518, no_result: 0.0714, loss: 0.6409 ||:  29%|##9       | 7/24 [00:02<00:05,  3.00it/s]
+BLEU: 0.6180, acc: 0.6484, no_result: 0.0781, loss: 0.6535 ||:  33%|###3      | 8/24 [00:02<00:05,  2.99it/s]
+BLEU: 0.6111, acc: 0.6528, no_result: 0.0764, loss: 0.6454 ||:  38%|###7      | 9/24 [00:03<00:05,  2.94it/s]
+BLEU: 0.6178, acc: 0.6406, no_result: 0.0688, loss: 0.6564 ||:  42%|####1     | 10/24 [00:03<00:04,  2.90it/s]
+BLEU: 0.6154, acc: 0.6250, no_result: 0.0682, loss: 0.6771 ||:  46%|####5     | 11/24 [00:03<00:04,  2.77it/s]
+BLEU: 0.6076, acc: 0.6302, no_result: 0.0625, loss: 0.6776 ||:  50%|#####     | 12/24 [00:04<00:04,  2.87it/s]
+BLEU: 0.5924, acc: 0.6250, no_result: 0.0649, loss: 0.7032 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.76it/s]
+BLEU: 0.5930, acc: 0.6116, no_result: 0.0670, loss: 0.7190 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.67it/s]
+BLEU: 0.5671, acc: 0.5938, no_result: 0.0854, loss: 0.7601 ||:  62%|######2   | 15/24 [00:05<00:03,  2.46it/s]
+BLEU: 0.5534, acc: 0.5742, no_result: 0.0957, loss: 0.7796 ||:  67%|######6   | 16/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.5475, acc: 0.5717, no_result: 0.1011, loss: 0.7794 ||:  71%|#######   | 17/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.5469, acc: 0.5729, no_result: 0.1024, loss: 0.7815 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.5542, acc: 0.5806, no_result: 0.1003, loss: 0.7653 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.5562, acc: 0.5844, no_result: 0.0969, loss: 0.7637 ||:  83%|########3 | 20/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.5533, acc: 0.5759, no_result: 0.0952, loss: 0.7633 ||:  88%|########7 | 21/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.5579, acc: 0.5668, no_result: 0.0952, loss: 0.7623 ||:  92%|#########1| 22/24 [00:08<00:00,  2.67it/s]
+BLEU: 0.5607, acc: 0.5761, no_result: 0.0910, loss: 0.7522 ||:  96%|#########5| 23/24 [00:08<00:00,  2.76it/s]
+BLEU: 0.5600, acc: 0.5706, no_result: 0.0919, loss: 0.7641 ||: 100%|##########| 24/24 [00:08<00:00,  3.41it/s]
+BLEU: 0.5600, acc: 0.5706, no_result: 0.0919, loss: 0.7641 ||: 100%|##########| 24/24 [00:08<00:00,  2.79it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4724 ||:   1%|          | 1/110 [00:00<01:20,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5133 ||:   2%|1         | 2/110 [00:01<01:14,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4808 ||:   3%|2         | 3/110 [00:02<01:14,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.5009 ||:   4%|3         | 4/110 [00:02<01:11,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4766 ||:   5%|4         | 5/110 [00:03<01:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4621 ||:   5%|5         | 6/110 [00:04<01:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4700 ||:   6%|6         | 7/110 [00:04<01:06,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4671 ||:   7%|7         | 8/110 [00:05<01:07,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4739 ||:   8%|8         | 9/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4709 ||:   9%|9         | 10/110 [00:06<01:04,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4665 ||:  10%|#         | 11/110 [00:07<01:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4677 ||:  11%|#         | 12/110 [00:08<01:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4595 ||:  12%|#1        | 13/110 [00:08<01:08,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4606 ||:  13%|#2        | 14/110 [00:09<01:09,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4613 ||:  14%|#3        | 15/110 [00:10<01:07,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4619 ||:  15%|#4        | 16/110 [00:11<01:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4599 ||:  15%|#5        | 17/110 [00:11<01:05,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4610 ||:  16%|#6        | 18/110 [00:12<01:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4669 ||:  17%|#7        | 19/110 [00:13<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4675 ||:  18%|#8        | 20/110 [00:14<01:31,  1.02s/it]
+acc: 0.0000, no_result: 0.0000, loss: 0.4691 ||:  19%|#9        | 21/110 [00:15<01:23,  1.06it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4680 ||:  20%|##        | 22/110 [00:16<01:17,  1.14it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4706 ||:  21%|##        | 23/110 [00:17<01:12,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4674 ||:  22%|##1       | 24/110 [00:17<01:07,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4691 ||:  23%|##2       | 25/110 [00:18<01:03,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4662 ||:  24%|##3       | 26/110 [00:19<01:00,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4656 ||:  25%|##4       | 27/110 [00:19<01:01,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4668 ||:  25%|##5       | 28/110 [00:20<00:58,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4669 ||:  26%|##6       | 29/110 [00:21<00:55,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4647 ||:  27%|##7       | 30/110 [00:21<00:58,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4687 ||:  28%|##8       | 31/110 [00:22<00:57,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4685 ||:  29%|##9       | 32/110 [00:23<00:56,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4714 ||:  30%|###       | 33/110 [00:24<00:58,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4733 ||:  31%|###       | 34/110 [00:24<00:56,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4741 ||:  32%|###1      | 35/110 [00:25<00:54,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4724 ||:  33%|###2      | 36/110 [00:26<00:57,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4706 ||:  34%|###3      | 37/110 [00:27<00:54,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4682 ||:  35%|###4      | 38/110 [00:27<00:54,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4714 ||:  35%|###5      | 39/110 [00:28<00:54,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4747 ||:  36%|###6      | 40/110 [00:29<00:52,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4756 ||:  37%|###7      | 41/110 [00:30<00:49,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4782 ||:  38%|###8      | 42/110 [00:30<00:48,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4771 ||:  39%|###9      | 43/110 [00:31<00:47,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4776 ||:  40%|####      | 44/110 [00:32<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4806 ||:  41%|####      | 45/110 [00:32<00:45,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4797 ||:  42%|####1     | 46/110 [00:33<00:46,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4805 ||:  43%|####2     | 47/110 [00:34<00:47,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4800 ||:  44%|####3     | 48/110 [00:35<00:45,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4776 ||:  45%|####4     | 49/110 [00:35<00:43,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4761 ||:  45%|####5     | 50/110 [00:36<00:42,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4761 ||:  46%|####6     | 51/110 [00:37<00:41,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4737 ||:  47%|####7     | 52/110 [00:37<00:40,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4748 ||:  48%|####8     | 53/110 [00:38<00:39,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4749 ||:  49%|####9     | 54/110 [00:39<00:39,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4729 ||:  50%|#####     | 55/110 [00:39<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4723 ||:  51%|#####     | 56/110 [00:40<00:38,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4706 ||:  52%|#####1    | 57/110 [00:41<00:36,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4697 ||:  53%|#####2    | 58/110 [00:42<00:35,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4687 ||:  54%|#####3    | 59/110 [00:42<00:36,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4702 ||:  55%|#####4    | 60/110 [00:43<00:35,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4697 ||:  55%|#####5    | 61/110 [00:44<00:35,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4692 ||:  56%|#####6    | 62/110 [00:45<00:35,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4680 ||:  57%|#####7    | 63/110 [00:45<00:35,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4688 ||:  58%|#####8    | 64/110 [00:46<00:35,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4701 ||:  59%|#####9    | 65/110 [00:47<00:34,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4694 ||:  60%|######    | 66/110 [00:48<00:33,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4701 ||:  61%|######    | 67/110 [00:48<00:32,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4698 ||:  62%|######1   | 68/110 [00:49<00:30,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4702 ||:  63%|######2   | 69/110 [00:50<00:29,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4701 ||:  64%|######3   | 70/110 [00:50<00:28,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4694 ||:  65%|######4   | 71/110 [00:51<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4695 ||:  65%|######5   | 72/110 [00:52<00:27,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4689 ||:  66%|######6   | 73/110 [00:53<00:27,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4683 ||:  67%|######7   | 74/110 [00:53<00:25,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4662 ||:  68%|######8   | 75/110 [00:54<00:25,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4668 ||:  69%|######9   | 76/110 [00:55<00:24,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4670 ||:  70%|#######   | 77/110 [00:56<00:23,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4665 ||:  71%|#######   | 78/110 [00:56<00:24,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4662 ||:  72%|#######1  | 79/110 [00:57<00:22,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4658 ||:  73%|#######2  | 80/110 [00:58<00:23,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4662 ||:  74%|#######3  | 81/110 [00:59<00:21,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4680 ||:  75%|#######4  | 82/110 [00:59<00:20,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4685 ||:  75%|#######5  | 83/110 [01:00<00:20,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4682 ||:  76%|#######6  | 84/110 [01:01<00:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4687 ||:  77%|#######7  | 85/110 [01:01<00:17,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4691 ||:  78%|#######8  | 86/110 [01:02<00:17,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4684 ||:  79%|#######9  | 87/110 [01:03<00:16,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4682 ||:  80%|########  | 88/110 [01:04<00:16,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4673 ||:  81%|########  | 89/110 [01:04<00:15,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4672 ||:  82%|########1 | 90/110 [01:05<00:14,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4679 ||:  83%|########2 | 91/110 [01:06<00:13,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4670 ||:  84%|########3 | 92/110 [01:06<00:12,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4673 ||:  85%|########4 | 93/110 [01:07<00:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4674 ||:  85%|########5 | 94/110 [01:08<00:11,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4678 ||:  86%|########6 | 95/110 [01:09<00:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4675 ||:  87%|########7 | 96/110 [01:09<00:09,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4672 ||:  88%|########8 | 97/110 [01:10<00:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4677 ||:  89%|########9 | 98/110 [01:11<00:08,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4676 ||:  90%|######### | 99/110 [01:11<00:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4672 ||:  91%|######### | 100/110 [01:12<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4669 ||:  92%|#########1| 101/110 [01:13<00:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4671 ||:  93%|#########2| 102/110 [01:14<00:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4679 ||:  94%|#########3| 103/110 [01:14<00:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4680 ||:  95%|#########4| 104/110 [01:15<00:04,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4673 ||:  95%|#########5| 105/110 [01:16<00:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4677 ||:  96%|#########6| 106/110 [01:17<00:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4680 ||:  97%|#########7| 107/110 [01:17<00:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4681 ||:  98%|#########8| 108/110 [01:18<00:01,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4679 ||:  99%|#########9| 109/110 [01:19<00:00,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4691 ||: 100%|##########| 110/110 [01:19<00:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4691 ||: 100%|##########| 110/110 [01:19<00:00,  1.38it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.5156, acc: 0.6875, no_result: 0.0625, loss: 0.6699 ||:   4%|4         | 1/24 [00:00<00:09,  2.43it/s]
+BLEU: 0.4991, acc: 0.7031, no_result: 0.0625, loss: 0.6205 ||:   8%|8         | 2/24 [00:00<00:08,  2.48it/s]
+BLEU: 0.4908, acc: 0.6562, no_result: 0.1146, loss: 0.6999 ||:  12%|#2        | 3/24 [00:01<00:08,  2.48it/s]
+BLEU: 0.5486, acc: 0.6484, no_result: 0.1016, loss: 0.6918 ||:  17%|#6        | 4/24 [00:01<00:08,  2.45it/s]
+BLEU: 0.5899, acc: 0.6687, no_result: 0.1000, loss: 0.6459 ||:  21%|##        | 5/24 [00:02<00:07,  2.45it/s]
+BLEU: 0.5846, acc: 0.6510, no_result: 0.0938, loss: 0.6533 ||:  25%|##5       | 6/24 [00:02<00:07,  2.53it/s]
+BLEU: 0.5807, acc: 0.6473, no_result: 0.0848, loss: 0.6491 ||:  29%|##9       | 7/24 [00:02<00:06,  2.65it/s]
+BLEU: 0.5755, acc: 0.6445, no_result: 0.0859, loss: 0.6563 ||:  33%|###3      | 8/24 [00:03<00:05,  2.68it/s]
+BLEU: 0.5663, acc: 0.6389, no_result: 0.0868, loss: 0.6603 ||:  38%|###7      | 9/24 [00:03<00:05,  2.62it/s]
+BLEU: 0.5779, acc: 0.6219, no_result: 0.0875, loss: 0.6681 ||:  42%|####1     | 10/24 [00:03<00:05,  2.54it/s]
+BLEU: 0.5859, acc: 0.6108, no_result: 0.0938, loss: 0.6936 ||:  46%|####5     | 11/24 [00:04<00:05,  2.42it/s]
+BLEU: 0.5806, acc: 0.6172, no_result: 0.0885, loss: 0.6888 ||:  50%|#####     | 12/24 [00:04<00:04,  2.49it/s]
+BLEU: 0.5658, acc: 0.6106, no_result: 0.0962, loss: 0.7104 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.38it/s]
+BLEU: 0.5663, acc: 0.5960, no_result: 0.1049, loss: 0.7304 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.25it/s]
+BLEU: 0.5512, acc: 0.5792, no_result: 0.1208, loss: 0.7705 ||:  62%|######2   | 15/24 [00:06<00:04,  2.12it/s]
+BLEU: 0.5446, acc: 0.5605, no_result: 0.1289, loss: 0.7864 ||:  67%|######6   | 16/24 [00:06<00:03,  2.07it/s]
+BLEU: 0.5466, acc: 0.5607, no_result: 0.1324, loss: 0.7829 ||:  71%|#######   | 17/24 [00:07<00:03,  2.14it/s]
+BLEU: 0.5442, acc: 0.5642, no_result: 0.1319, loss: 0.7874 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.13it/s]
+BLEU: 0.5508, acc: 0.5724, no_result: 0.1283, loss: 0.7732 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.22it/s]
+BLEU: 0.5503, acc: 0.5781, no_result: 0.1234, loss: 0.7738 ||:  83%|########3 | 20/24 [00:08<00:01,  2.35it/s]
+BLEU: 0.5483, acc: 0.5685, no_result: 0.1235, loss: 0.7757 ||:  88%|########7 | 21/24 [00:08<00:01,  2.34it/s]
+BLEU: 0.5505, acc: 0.5639, no_result: 0.1250, loss: 0.7696 ||:  92%|#########1| 22/24 [00:09<00:00,  2.29it/s]
+BLEU: 0.5512, acc: 0.5707, no_result: 0.1223, loss: 0.7595 ||:  96%|#########5| 23/24 [00:09<00:00,  2.38it/s]
+BLEU: 0.5505, acc: 0.5654, no_result: 0.1218, loss: 0.7727 ||: 100%|##########| 24/24 [00:09<00:00,  2.92it/s]
+BLEU: 0.5505, acc: 0.5654, no_result: 0.1218, loss: 0.7727 ||: 100%|##########| 24/24 [00:09<00:00,  2.43it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4378 ||:   1%|          | 1/110 [00:00<01:12,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4588 ||:   2%|1         | 2/110 [00:01<01:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4554 ||:   3%|2         | 3/110 [00:02<01:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4550 ||:   4%|3         | 4/110 [00:02<01:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4358 ||:   5%|4         | 5/110 [00:03<01:13,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4310 ||:   5%|5         | 6/110 [00:04<01:12,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4205 ||:   6%|6         | 7/110 [00:04<01:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4403 ||:   7%|7         | 8/110 [00:05<01:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4303 ||:   8%|8         | 9/110 [00:06<01:05,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4345 ||:   9%|9         | 10/110 [00:07<01:35,  1.05it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4384 ||:  10%|#         | 11/110 [00:08<01:27,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4433 ||:  11%|#         | 12/110 [00:09<01:18,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4426 ||:  12%|#1        | 13/110 [00:09<01:12,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4441 ||:  13%|#2        | 14/110 [00:10<01:09,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4403 ||:  14%|#3        | 15/110 [00:11<01:13,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4388 ||:  15%|#4        | 16/110 [00:11<01:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4418 ||:  15%|#5        | 17/110 [00:12<01:10,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4436 ||:  16%|#6        | 18/110 [00:13<01:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4416 ||:  17%|#7        | 19/110 [00:13<01:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4398 ||:  18%|#8        | 20/110 [00:14<01:01,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4375 ||:  19%|#9        | 21/110 [00:15<00:59,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4352 ||:  20%|##        | 22/110 [00:15<00:58,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4327 ||:  21%|##        | 23/110 [00:16<00:55,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4357 ||:  22%|##1       | 24/110 [00:17<00:59,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4331 ||:  23%|##2       | 25/110 [00:17<00:59,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4351 ||:  24%|##3       | 26/110 [00:18<00:58,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4325 ||:  25%|##4       | 27/110 [00:19<00:56,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4335 ||:  25%|##5       | 28/110 [00:19<00:54,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4317 ||:  26%|##6       | 29/110 [00:20<00:52,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4337 ||:  27%|##7       | 30/110 [00:21<00:51,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4317 ||:  28%|##8       | 31/110 [00:21<00:49,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4292 ||:  29%|##9       | 32/110 [00:22<00:50,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4323 ||:  30%|###       | 33/110 [00:23<00:50,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4333 ||:  31%|###       | 34/110 [00:23<00:49,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4348 ||:  32%|###1      | 35/110 [00:24<00:48,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4356 ||:  33%|###2      | 36/110 [00:25<00:47,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4365 ||:  34%|###3      | 37/110 [00:25<00:47,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4362 ||:  35%|###4      | 38/110 [00:26<00:45,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4343 ||:  35%|###5      | 39/110 [00:27<00:46,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4327 ||:  36%|###6      | 40/110 [00:27<00:44,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4335 ||:  37%|###7      | 41/110 [00:28<00:44,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4317 ||:  38%|###8      | 42/110 [00:28<00:45,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4318 ||:  39%|###9      | 43/110 [00:29<00:47,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4312 ||:  40%|####      | 44/110 [00:30<00:47,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4333 ||:  41%|####      | 45/110 [00:31<00:47,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4326 ||:  42%|####1     | 46/110 [00:31<00:44,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4329 ||:  43%|####2     | 47/110 [00:32<00:44,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4324 ||:  44%|####3     | 48/110 [00:33<00:42,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4306 ||:  45%|####4     | 49/110 [00:34<00:42,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4295 ||:  45%|####5     | 50/110 [00:34<00:41,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4291 ||:  46%|####6     | 51/110 [00:35<00:40,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4291 ||:  47%|####7     | 52/110 [00:35<00:38,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4277 ||:  48%|####8     | 53/110 [00:36<00:39,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4302 ||:  49%|####9     | 54/110 [00:37<00:36,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4299 ||:  50%|#####     | 55/110 [00:37<00:35,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4295 ||:  51%|#####     | 56/110 [00:38<00:34,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4311 ||:  52%|#####1    | 57/110 [00:39<00:33,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4312 ||:  53%|#####2    | 58/110 [00:39<00:32,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4308 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4300 ||:  55%|#####4    | 60/110 [00:41<00:34,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4305 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4322 ||:  56%|#####6    | 62/110 [00:42<00:30,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4333 ||:  57%|#####7    | 63/110 [00:43<00:31,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4363 ||:  58%|#####8    | 64/110 [00:43<00:30,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4361 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4354 ||:  60%|######    | 66/110 [00:45<00:29,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4351 ||:  61%|######    | 67/110 [00:45<00:28,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4353 ||:  62%|######1   | 68/110 [00:46<00:27,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4341 ||:  63%|######2   | 69/110 [00:47<00:26,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4365 ||:  64%|######3   | 70/110 [00:47<00:25,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4373 ||:  65%|######4   | 71/110 [00:48<00:25,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4363 ||:  65%|######5   | 72/110 [00:49<00:24,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4354 ||:  66%|######6   | 73/110 [00:49<00:23,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4358 ||:  67%|######7   | 74/110 [00:50<00:23,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4365 ||:  68%|######8   | 75/110 [00:50<00:21,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4349 ||:  69%|######9   | 76/110 [00:51<00:21,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4346 ||:  70%|#######   | 77/110 [00:52<00:21,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4353 ||:  71%|#######   | 78/110 [00:53<00:23,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4368 ||:  72%|#######1  | 79/110 [00:53<00:22,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4384 ||:  73%|#######2  | 80/110 [00:54<00:21,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4385 ||:  74%|#######3  | 81/110 [00:55<00:20,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4391 ||:  75%|#######4  | 82/110 [00:55<00:19,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4405 ||:  75%|#######5  | 83/110 [00:56<00:18,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4408 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4417 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4423 ||:  78%|#######8  | 86/110 [00:58<00:15,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4426 ||:  79%|#######9  | 87/110 [00:59<00:14,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4425 ||:  80%|########  | 88/110 [00:59<00:14,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4425 ||:  81%|########  | 89/110 [01:00<00:13,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4425 ||:  82%|########1 | 90/110 [01:01<00:12,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4432 ||:  83%|########2 | 91/110 [01:01<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4434 ||:  84%|########3 | 92/110 [01:02<00:12,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4435 ||:  85%|########4 | 93/110 [01:03<00:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4439 ||:  85%|########5 | 94/110 [01:04<00:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4441 ||:  86%|########6 | 95/110 [01:04<00:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4438 ||:  87%|########7 | 96/110 [01:05<00:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4450 ||:  88%|########8 | 97/110 [01:06<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4453 ||:  89%|########9 | 98/110 [01:06<00:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4451 ||:  90%|######### | 99/110 [01:07<00:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4464 ||:  91%|######### | 100/110 [01:08<00:06,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4462 ||:  92%|#########1| 101/110 [01:08<00:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4456 ||:  93%|#########2| 102/110 [01:09<00:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4455 ||:  94%|#########3| 103/110 [01:10<00:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4453 ||:  95%|#########4| 104/110 [01:10<00:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4456 ||:  95%|#########5| 105/110 [01:11<00:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4445 ||:  96%|#########6| 106/110 [01:12<00:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4433 ||:  97%|#########7| 107/110 [01:12<00:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4446 ||:  98%|#########8| 108/110 [01:13<00:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4448 ||:  99%|#########9| 109/110 [01:14<00:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4449 ||: 100%|##########| 110/110 [01:15<00:00,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4449 ||: 100%|##########| 110/110 [01:15<00:00,  1.46it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.5496, acc: 0.6875, no_result: 0.0312, loss: 0.6504 ||:   4%|4         | 1/24 [00:00<00:08,  2.86it/s]
+BLEU: 0.5254, acc: 0.7188, no_result: 0.0469, loss: 0.6164 ||:   8%|8         | 2/24 [00:00<00:07,  2.95it/s]
+BLEU: 0.5094, acc: 0.6458, no_result: 0.0938, loss: 0.6761 ||:  12%|#2        | 3/24 [00:01<00:07,  2.96it/s]
+BLEU: 0.5242, acc: 0.6406, no_result: 0.0859, loss: 0.6602 ||:  17%|#6        | 4/24 [00:01<00:07,  2.86it/s]
+BLEU: 0.5702, acc: 0.6687, no_result: 0.0813, loss: 0.6379 ||:  21%|##        | 5/24 [00:01<00:06,  2.83it/s]
+BLEU: 0.5701, acc: 0.6302, no_result: 0.0729, loss: 0.6539 ||:  25%|##5       | 6/24 [00:02<00:06,  2.97it/s]
+BLEU: 0.5619, acc: 0.6295, no_result: 0.0759, loss: 0.6521 ||:  29%|##9       | 7/24 [00:02<00:05,  3.04it/s]
+BLEU: 0.5618, acc: 0.6328, no_result: 0.0781, loss: 0.6644 ||:  33%|###3      | 8/24 [00:02<00:05,  3.05it/s]
+BLEU: 0.5564, acc: 0.6389, no_result: 0.0764, loss: 0.6628 ||:  38%|###7      | 9/24 [00:03<00:05,  2.99it/s]
+BLEU: 0.5671, acc: 0.6312, no_result: 0.0688, loss: 0.6695 ||:  42%|####1     | 10/24 [00:03<00:04,  2.94it/s]
+BLEU: 0.5674, acc: 0.6136, no_result: 0.0767, loss: 0.6895 ||:  46%|####5     | 11/24 [00:03<00:04,  2.77it/s]
+BLEU: 0.5685, acc: 0.6198, no_result: 0.0703, loss: 0.6904 ||:  50%|#####     | 12/24 [00:04<00:04,  2.90it/s]
+BLEU: 0.5598, acc: 0.6106, no_result: 0.0745, loss: 0.7140 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.80it/s]
+BLEU: 0.5660, acc: 0.6004, no_result: 0.0737, loss: 0.7291 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.72it/s]
+BLEU: 0.5379, acc: 0.5750, no_result: 0.0917, loss: 0.7691 ||:  62%|######2   | 15/24 [00:05<00:03,  2.54it/s]
+BLEU: 0.5235, acc: 0.5605, no_result: 0.0996, loss: 0.7893 ||:  67%|######6   | 16/24 [00:05<00:03,  2.48it/s]
+BLEU: 0.5249, acc: 0.5625, no_result: 0.1066, loss: 0.7938 ||:  71%|#######   | 17/24 [00:06<00:02,  2.54it/s]
+BLEU: 0.5296, acc: 0.5608, no_result: 0.1094, loss: 0.7988 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.51it/s]
+BLEU: 0.5365, acc: 0.5724, no_result: 0.1069, loss: 0.7818 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.60it/s]
+BLEU: 0.5405, acc: 0.5797, no_result: 0.1016, loss: 0.7793 ||:  83%|########3 | 20/24 [00:07<00:01,  2.75it/s]
+BLEU: 0.5354, acc: 0.5714, no_result: 0.0967, loss: 0.7782 ||:  88%|########7 | 21/24 [00:07<00:01,  2.76it/s]
+BLEU: 0.5355, acc: 0.5653, no_result: 0.0966, loss: 0.7763 ||:  92%|#########1| 22/24 [00:07<00:00,  2.71it/s]
+BLEU: 0.5381, acc: 0.5734, no_result: 0.0938, loss: 0.7678 ||:  96%|#########5| 23/24 [00:08<00:00,  2.80it/s]
+BLEU: 0.5367, acc: 0.5680, no_result: 0.0945, loss: 0.7758 ||: 100%|##########| 24/24 [00:08<00:00,  3.46it/s]
+BLEU: 0.5367, acc: 0.5680, no_result: 0.0945, loss: 0.7758 ||: 100%|##########| 24/24 [00:08<00:00,  2.85it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4810 ||:   1%|          | 1/110 [00:00<01:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4432 ||:   2%|1         | 2/110 [00:01<01:10,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4354 ||:   3%|2         | 3/110 [00:02<01:13,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4446 ||:   4%|3         | 4/110 [00:02<01:20,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4431 ||:   5%|4         | 5/110 [00:03<01:17,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4470 ||:   5%|5         | 6/110 [00:04<01:17,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4465 ||:   6%|6         | 7/110 [00:05<01:17,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4438 ||:   7%|7         | 8/110 [00:05<01:15,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4497 ||:   8%|8         | 9/110 [00:06<01:13,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4339 ||:   9%|9         | 10/110 [00:07<01:11,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4353 ||:  10%|#         | 11/110 [00:07<01:11,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4387 ||:  11%|#         | 12/110 [00:08<01:11,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4407 ||:  12%|#1        | 13/110 [00:09<01:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4342 ||:  13%|#2        | 14/110 [00:10<01:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4297 ||:  14%|#3        | 15/110 [00:10<01:07,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4295 ||:  15%|#4        | 16/110 [00:11<01:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4266 ||:  15%|#5        | 17/110 [00:12<01:07,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4270 ||:  16%|#6        | 18/110 [00:13<01:07,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4340 ||:  17%|#7        | 19/110 [00:13<01:10,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4305 ||:  18%|#8        | 20/110 [00:14<01:07,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4310 ||:  19%|#9        | 21/110 [00:15<01:04,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4323 ||:  20%|##        | 22/110 [00:16<01:05,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4316 ||:  21%|##        | 23/110 [00:16<01:02,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4347 ||:  22%|##1       | 24/110 [00:17<01:01,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4345 ||:  23%|##2       | 25/110 [00:18<00:58,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4330 ||:  24%|##3       | 26/110 [00:18<01:00,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4329 ||:  25%|##4       | 27/110 [00:19<01:00,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4342 ||:  25%|##5       | 28/110 [00:20<01:05,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4326 ||:  26%|##6       | 29/110 [00:21<01:03,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4312 ||:  27%|##7       | 30/110 [00:22<01:03,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4288 ||:  28%|##8       | 31/110 [00:22<01:01,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4291 ||:  29%|##9       | 32/110 [00:23<01:00,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4301 ||:  30%|###       | 33/110 [00:24<00:58,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4301 ||:  31%|###       | 34/110 [00:25<00:57,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4293 ||:  32%|###1      | 35/110 [00:25<00:54,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4295 ||:  33%|###2      | 36/110 [00:26<00:52,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4319 ||:  34%|###3      | 37/110 [00:27<00:52,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4299 ||:  35%|###4      | 38/110 [00:27<00:52,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4306 ||:  35%|###5      | 39/110 [00:28<00:50,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4320 ||:  36%|###6      | 40/110 [00:29<00:54,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4326 ||:  37%|###7      | 41/110 [00:30<00:53,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4309 ||:  38%|###8      | 42/110 [00:31<00:52,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4313 ||:  39%|###9      | 43/110 [00:31<00:50,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4289 ||:  40%|####      | 44/110 [00:32<00:48,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4329 ||:  41%|####      | 45/110 [00:33<00:46,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4336 ||:  42%|####1     | 46/110 [00:33<00:45,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4337 ||:  43%|####2     | 47/110 [00:34<00:43,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4345 ||:  44%|####3     | 48/110 [00:35<00:42,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4338 ||:  45%|####4     | 49/110 [00:35<00:40,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4352 ||:  45%|####5     | 50/110 [00:36<00:39,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4349 ||:  46%|####6     | 51/110 [00:37<00:38,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4343 ||:  47%|####7     | 52/110 [00:37<00:38,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4352 ||:  48%|####8     | 53/110 [00:38<00:40,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4346 ||:  49%|####9     | 54/110 [00:39<00:37,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4329 ||:  50%|#####     | 55/110 [00:39<00:37,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4356 ||:  51%|#####     | 56/110 [00:40<00:39,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4351 ||:  52%|#####1    | 57/110 [00:41<00:37,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4361 ||:  53%|#####2    | 58/110 [00:42<00:36,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4353 ||:  54%|#####3    | 59/110 [00:42<00:35,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4356 ||:  55%|#####4    | 60/110 [00:43<00:35,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4344 ||:  55%|#####5    | 61/110 [00:44<00:36,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4350 ||:  56%|#####6    | 62/110 [00:45<00:35,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4347 ||:  57%|#####7    | 63/110 [00:45<00:33,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4341 ||:  58%|#####8    | 64/110 [00:46<00:32,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4348 ||:  59%|#####9    | 65/110 [00:47<00:31,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4333 ||:  60%|######    | 66/110 [00:47<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4334 ||:  61%|######    | 67/110 [00:48<00:31,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4341 ||:  62%|######1   | 68/110 [00:49<00:29,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4337 ||:  63%|######2   | 69/110 [00:49<00:28,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4337 ||:  64%|######3   | 70/110 [00:50<00:28,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4328 ||:  65%|######4   | 71/110 [00:51<00:26,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4322 ||:  65%|######5   | 72/110 [00:51<00:25,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4315 ||:  66%|######6   | 73/110 [00:52<00:24,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4312 ||:  67%|######7   | 74/110 [00:53<00:24,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4300 ||:  68%|######8   | 75/110 [00:53<00:22,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4303 ||:  69%|######9   | 76/110 [00:54<00:22,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4299 ||:  70%|#######   | 77/110 [00:55<00:21,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4295 ||:  71%|#######   | 78/110 [00:55<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4315 ||:  72%|#######1  | 79/110 [00:56<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4316 ||:  73%|#######2  | 80/110 [00:57<00:20,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4327 ||:  74%|#######3  | 81/110 [00:57<00:20,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4333 ||:  75%|#######4  | 82/110 [00:58<00:18,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4339 ||:  75%|#######5  | 83/110 [00:59<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4349 ||:  76%|#######6  | 84/110 [00:59<00:17,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4365 ||:  77%|#######7  | 85/110 [01:00<00:16,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4365 ||:  78%|#######8  | 86/110 [01:01<00:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4374 ||:  79%|#######9  | 87/110 [01:02<00:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4368 ||:  80%|########  | 88/110 [01:02<00:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4376 ||:  81%|########  | 89/110 [01:03<00:14,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4370 ||:  82%|########1 | 90/110 [01:04<00:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4375 ||:  83%|########2 | 91/110 [01:05<00:14,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4373 ||:  84%|########3 | 92/110 [01:05<00:13,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4362 ||:  85%|########4 | 93/110 [01:06<00:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4361 ||:  85%|########5 | 94/110 [01:07<00:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4366 ||:  86%|########6 | 95/110 [01:07<00:10,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4380 ||:  87%|########7 | 96/110 [01:08<00:10,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4377 ||:  88%|########8 | 97/110 [01:09<00:09,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4374 ||:  89%|########9 | 98/110 [01:09<00:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4373 ||:  90%|######### | 99/110 [01:10<00:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4367 ||:  91%|######### | 100/110 [01:12<00:09,  1.02it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4370 ||:  92%|#########1| 101/110 [01:13<00:08,  1.08it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4367 ||:  93%|#########2| 102/110 [01:13<00:06,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4378 ||:  94%|#########3| 103/110 [01:14<00:05,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4381 ||:  95%|#########4| 104/110 [01:15<00:04,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4393 ||:  95%|#########5| 105/110 [01:16<00:04,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4390 ||:  96%|#########6| 106/110 [01:16<00:03,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4393 ||:  97%|#########7| 107/110 [01:17<00:02,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4399 ||:  98%|#########8| 108/110 [01:18<00:01,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4396 ||:  99%|#########9| 109/110 [01:19<00:00,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4410 ||: 100%|##########| 110/110 [01:19<00:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4410 ||: 100%|##########| 110/110 [01:19<00:00,  1.38it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6409, acc: 0.6875, no_result: 0.0625, loss: 0.5999 ||:   4%|4         | 1/24 [00:00<00:09,  2.53it/s]
+BLEU: 0.5496, acc: 0.6875, no_result: 0.0469, loss: 0.6147 ||:   8%|8         | 2/24 [00:00<00:08,  2.56it/s]
+BLEU: 0.5501, acc: 0.6458, no_result: 0.0729, loss: 0.6946 ||:  12%|#2        | 3/24 [00:01<00:08,  2.54it/s]
+BLEU: 0.5815, acc: 0.6406, no_result: 0.0625, loss: 0.6840 ||:  17%|#6        | 4/24 [00:01<00:08,  2.49it/s]
+BLEU: 0.6180, acc: 0.6687, no_result: 0.0688, loss: 0.6510 ||:  21%|##        | 5/24 [00:02<00:07,  2.46it/s]
+BLEU: 0.6168, acc: 0.6302, no_result: 0.0625, loss: 0.6651 ||:  25%|##5       | 6/24 [00:02<00:07,  2.53it/s]
+BLEU: 0.6217, acc: 0.6384, no_result: 0.0580, loss: 0.6522 ||:  29%|##9       | 7/24 [00:02<00:06,  2.64it/s]
+BLEU: 0.6103, acc: 0.6328, no_result: 0.0625, loss: 0.6629 ||:  33%|###3      | 8/24 [00:03<00:06,  2.65it/s]
+BLEU: 0.5943, acc: 0.6389, no_result: 0.0625, loss: 0.6596 ||:  38%|###7      | 9/24 [00:03<00:05,  2.60it/s]
+BLEU: 0.6075, acc: 0.6375, no_result: 0.0563, loss: 0.6673 ||:  42%|####1     | 10/24 [00:03<00:05,  2.56it/s]
+BLEU: 0.5991, acc: 0.6193, no_result: 0.0653, loss: 0.6893 ||:  46%|####5     | 11/24 [00:04<00:05,  2.43it/s]
+BLEU: 0.5963, acc: 0.6276, no_result: 0.0599, loss: 0.6868 ||:  50%|#####     | 12/24 [00:04<00:04,  2.51it/s]
+BLEU: 0.5811, acc: 0.6202, no_result: 0.0673, loss: 0.7096 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.41it/s]
+BLEU: 0.5835, acc: 0.6094, no_result: 0.0737, loss: 0.7269 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.33it/s]
+BLEU: 0.5646, acc: 0.5917, no_result: 0.0896, loss: 0.7671 ||:  62%|######2   | 15/24 [00:06<00:04,  2.19it/s]
+BLEU: 0.5486, acc: 0.5762, no_result: 0.1035, loss: 0.7871 ||:  67%|######6   | 16/24 [00:06<00:03,  2.12it/s]
+BLEU: 0.5509, acc: 0.5754, no_result: 0.1103, loss: 0.7852 ||:  71%|#######   | 17/24 [00:07<00:03,  2.17it/s]
+BLEU: 0.5507, acc: 0.5747, no_result: 0.1111, loss: 0.7877 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.17it/s]
+BLEU: 0.5595, acc: 0.5839, no_result: 0.1086, loss: 0.7710 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.26it/s]
+BLEU: 0.5612, acc: 0.5906, no_result: 0.1062, loss: 0.7692 ||:  83%|########3 | 20/24 [00:08<00:01,  2.37it/s]
+BLEU: 0.5550, acc: 0.5878, no_result: 0.1042, loss: 0.7687 ||:  88%|########7 | 21/24 [00:08<00:01,  2.38it/s]
+BLEU: 0.5552, acc: 0.5781, no_result: 0.1037, loss: 0.7660 ||:  92%|#########1| 22/24 [00:09<00:00,  2.36it/s]
+BLEU: 0.5589, acc: 0.5829, no_result: 0.1033, loss: 0.7561 ||:  96%|#########5| 23/24 [00:09<00:00,  2.42it/s]
+BLEU: 0.5588, acc: 0.5817, no_result: 0.1036, loss: 0.7639 ||: 100%|##########| 24/24 [00:09<00:00,  2.97it/s]
+BLEU: 0.5588, acc: 0.5817, no_result: 0.1036, loss: 0.7639 ||: 100%|##########| 24/24 [00:09<00:00,  2.46it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3930 ||:   1%|          | 1/110 [00:00<01:31,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3843 ||:   2%|1         | 2/110 [00:01<01:27,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3918 ||:   3%|2         | 3/110 [00:02<01:19,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4114 ||:   4%|3         | 4/110 [00:02<01:14,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3999 ||:   5%|4         | 5/110 [00:03<01:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3982 ||:   5%|5         | 6/110 [00:04<01:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3984 ||:   6%|6         | 7/110 [00:04<01:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3899 ||:   7%|7         | 8/110 [00:05<01:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3949 ||:   8%|8         | 9/110 [00:06<01:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3936 ||:   9%|9         | 10/110 [00:07<01:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4013 ||:  10%|#         | 11/110 [00:07<01:10,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4013 ||:  11%|#         | 12/110 [00:08<01:14,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3964 ||:  12%|#1        | 13/110 [00:09<01:12,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3964 ||:  13%|#2        | 14/110 [00:09<01:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3892 ||:  14%|#3        | 15/110 [00:10<01:12,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3842 ||:  15%|#4        | 16/110 [00:11<01:12,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3918 ||:  15%|#5        | 17/110 [00:12<01:10,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3893 ||:  16%|#6        | 18/110 [00:13<01:07,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3907 ||:  17%|#7        | 19/110 [00:13<01:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3885 ||:  18%|#8        | 20/110 [00:14<01:06,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3879 ||:  19%|#9        | 21/110 [00:15<01:04,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3903 ||:  20%|##        | 22/110 [00:15<01:04,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3909 ||:  21%|##        | 23/110 [00:16<01:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3906 ||:  22%|##1       | 24/110 [00:17<00:56,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3901 ||:  23%|##2       | 25/110 [00:17<00:52,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3916 ||:  24%|##3       | 26/110 [00:18<00:53,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3939 ||:  25%|##4       | 27/110 [00:19<00:55,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3927 ||:  25%|##5       | 28/110 [00:19<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3943 ||:  26%|##6       | 29/110 [00:20<00:53,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3941 ||:  27%|##7       | 30/110 [00:21<00:51,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3900 ||:  28%|##8       | 31/110 [00:21<00:51,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3883 ||:  29%|##9       | 32/110 [00:22<00:49,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3872 ||:  30%|###       | 33/110 [00:22<00:49,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3869 ||:  31%|###       | 34/110 [00:23<00:49,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3909 ||:  32%|###1      | 35/110 [00:24<00:50,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3913 ||:  33%|###2      | 36/110 [00:25<00:51,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3934 ||:  34%|###3      | 37/110 [00:25<00:48,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3944 ||:  35%|###4      | 38/110 [00:26<00:45,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3962 ||:  35%|###5      | 39/110 [00:26<00:44,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3956 ||:  36%|###6      | 40/110 [00:27<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3961 ||:  37%|###7      | 41/110 [00:28<00:47,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3947 ||:  38%|###8      | 42/110 [00:29<00:49,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3954 ||:  39%|###9      | 43/110 [00:29<00:47,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3949 ||:  40%|####      | 44/110 [00:30<00:45,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3957 ||:  41%|####      | 45/110 [00:31<00:46,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3940 ||:  42%|####1     | 46/110 [00:31<00:43,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3964 ||:  43%|####2     | 47/110 [00:32<00:42,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3971 ||:  44%|####3     | 48/110 [00:33<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3984 ||:  45%|####4     | 49/110 [00:33<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3988 ||:  45%|####5     | 50/110 [00:34<00:39,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4000 ||:  46%|####6     | 51/110 [00:34<00:37,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4002 ||:  47%|####7     | 52/110 [00:35<00:36,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3995 ||:  48%|####8     | 53/110 [00:36<00:36,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3983 ||:  49%|####9     | 54/110 [00:37<00:37,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4007 ||:  50%|#####     | 55/110 [00:37<00:37,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4007 ||:  51%|#####     | 56/110 [00:38<00:35,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4027 ||:  52%|#####1    | 57/110 [00:39<00:36,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4054 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4065 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4059 ||:  55%|#####4    | 60/110 [00:40<00:31,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4060 ||:  55%|#####5    | 61/110 [00:41<00:31,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4053 ||:  56%|#####6    | 62/110 [00:42<00:30,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4062 ||:  57%|#####7    | 63/110 [00:42<00:29,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4065 ||:  58%|#####8    | 64/110 [00:43<00:28,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4068 ||:  59%|#####9    | 65/110 [00:44<00:28,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4070 ||:  60%|######    | 66/110 [00:44<00:29,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4077 ||:  61%|######    | 67/110 [00:45<00:28,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4066 ||:  62%|######1   | 68/110 [00:46<00:26,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4064 ||:  63%|######2   | 69/110 [00:46<00:26,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4054 ||:  64%|######3   | 70/110 [00:47<00:26,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4053 ||:  65%|######4   | 71/110 [00:48<00:25,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4065 ||:  65%|######5   | 72/110 [00:48<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4057 ||:  66%|######6   | 73/110 [00:49<00:26,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4063 ||:  67%|######7   | 74/110 [00:50<00:24,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4056 ||:  68%|######8   | 75/110 [00:50<00:23,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4048 ||:  69%|######9   | 76/110 [00:51<00:22,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4057 ||:  70%|#######   | 77/110 [00:52<00:21,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4063 ||:  71%|#######   | 78/110 [00:52<00:20,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4062 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4066 ||:  73%|#######2  | 80/110 [00:53<00:18,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4064 ||:  74%|#######3  | 81/110 [00:54<00:17,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4055 ||:  75%|#######4  | 82/110 [00:55<00:17,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4049 ||:  75%|#######5  | 83/110 [00:55<00:16,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4047 ||:  76%|#######6  | 84/110 [00:56<00:16,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4060 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4064 ||:  78%|#######8  | 86/110 [00:57<00:15,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4060 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4055 ||:  80%|########  | 88/110 [00:59<00:15,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4063 ||:  81%|########  | 89/110 [00:59<00:14,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4064 ||:  82%|########1 | 90/110 [01:01<00:17,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4067 ||:  83%|########2 | 91/110 [01:02<00:15,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4071 ||:  84%|########3 | 92/110 [01:02<00:14,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4075 ||:  85%|########4 | 93/110 [01:03<00:12,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4079 ||:  85%|########5 | 94/110 [01:04<00:11,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4082 ||:  86%|########6 | 95/110 [01:04<00:10,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4079 ||:  87%|########7 | 96/110 [01:05<00:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4080 ||:  88%|########8 | 97/110 [01:06<00:09,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4091 ||:  89%|########9 | 98/110 [01:06<00:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4092 ||:  90%|######### | 99/110 [01:07<00:07,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4092 ||:  91%|######### | 100/110 [01:08<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4102 ||:  92%|#########1| 101/110 [01:08<00:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4102 ||:  93%|#########2| 102/110 [01:09<00:05,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4105 ||:  94%|#########3| 103/110 [01:10<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4108 ||:  95%|#########4| 104/110 [01:10<00:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4111 ||:  95%|#########5| 105/110 [01:11<00:03,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4107 ||:  96%|#########6| 106/110 [01:12<00:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4109 ||:  97%|#########7| 107/110 [01:12<00:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4106 ||:  98%|#########8| 108/110 [01:13<00:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4118 ||:  99%|#########9| 109/110 [01:14<00:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4124 ||: 100%|##########| 110/110 [01:14<00:00,  1.68it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.4124 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6914, acc: 0.7500, no_result: 0.0000, loss: 0.6235 ||:   4%|4         | 1/24 [00:00<00:08,  2.78it/s]
+BLEU: 0.6621, acc: 0.7500, no_result: 0.0156, loss: 0.6033 ||:   8%|8         | 2/24 [00:00<00:07,  2.85it/s]
+BLEU: 0.6258, acc: 0.6771, no_result: 0.0729, loss: 0.6686 ||:  12%|#2        | 3/24 [00:01<00:07,  2.85it/s]
+BLEU: 0.6151, acc: 0.6562, no_result: 0.0703, loss: 0.6649 ||:  17%|#6        | 4/24 [00:01<00:07,  2.77it/s]
+BLEU: 0.6494, acc: 0.6750, no_result: 0.0688, loss: 0.6527 ||:  21%|##        | 5/24 [00:01<00:07,  2.54it/s]
+BLEU: 0.6494, acc: 0.6354, no_result: 0.0677, loss: 0.6718 ||:  25%|##5       | 6/24 [00:02<00:07,  2.55it/s]
+BLEU: 0.6577, acc: 0.6473, no_result: 0.0670, loss: 0.6579 ||:  29%|##9       | 7/24 [00:02<00:06,  2.59it/s]
+BLEU: 0.6505, acc: 0.6523, no_result: 0.0664, loss: 0.6678 ||:  33%|###3      | 8/24 [00:03<00:06,  2.58it/s]
+BLEU: 0.6360, acc: 0.6528, no_result: 0.0694, loss: 0.6656 ||:  38%|###7      | 9/24 [00:03<00:05,  2.50it/s]
+BLEU: 0.6386, acc: 0.6469, no_result: 0.0656, loss: 0.6761 ||:  42%|####1     | 10/24 [00:03<00:05,  2.45it/s]
+BLEU: 0.6325, acc: 0.6278, no_result: 0.0767, loss: 0.7019 ||:  46%|####5     | 11/24 [00:04<00:05,  2.33it/s]
+BLEU: 0.6285, acc: 0.6276, no_result: 0.0755, loss: 0.6986 ||:  50%|#####     | 12/24 [00:04<00:05,  2.39it/s]
+BLEU: 0.6188, acc: 0.6250, no_result: 0.0793, loss: 0.7194 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.32it/s]
+BLEU: 0.6242, acc: 0.6250, no_result: 0.0804, loss: 0.7368 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.23it/s]
+BLEU: 0.6031, acc: 0.6000, no_result: 0.0938, loss: 0.7784 ||:  62%|######2   | 15/24 [00:06<00:04,  2.10it/s]
+BLEU: 0.5917, acc: 0.5918, no_result: 0.1035, loss: 0.7965 ||:  67%|######6   | 16/24 [00:06<00:03,  2.04it/s]
+BLEU: 0.5885, acc: 0.5919, no_result: 0.1103, loss: 0.7963 ||:  71%|#######   | 17/24 [00:07<00:03,  2.10it/s]
+BLEU: 0.5922, acc: 0.5903, no_result: 0.1146, loss: 0.8002 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.08it/s]
+BLEU: 0.5983, acc: 0.5987, no_result: 0.1118, loss: 0.7852 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.18it/s]
+BLEU: 0.6007, acc: 0.6000, no_result: 0.1094, loss: 0.7850 ||:  83%|########3 | 20/24 [00:08<00:01,  2.28it/s]
+BLEU: 0.5952, acc: 0.5908, no_result: 0.1071, loss: 0.7834 ||:  88%|########7 | 21/24 [00:08<00:01,  2.29it/s]
+BLEU: 0.5986, acc: 0.5881, no_result: 0.1051, loss: 0.7778 ||:  92%|#########1| 22/24 [00:09<00:00,  2.25it/s]
+BLEU: 0.6028, acc: 0.5938, no_result: 0.1019, loss: 0.7679 ||:  96%|#########5| 23/24 [00:09<00:00,  2.33it/s]
+BLEU: 0.6022, acc: 0.5875, no_result: 0.1023, loss: 0.7746 ||: 100%|##########| 24/24 [00:09<00:00,  2.86it/s]
+BLEU: 0.6022, acc: 0.5875, no_result: 0.1023, loss: 0.7746 ||: 100%|##########| 24/24 [00:09<00:00,  2.41it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3764 ||:   1%|          | 1/110 [00:00<01:24,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3913 ||:   2%|1         | 2/110 [00:01<01:25,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3760 ||:   3%|2         | 3/110 [00:02<01:26,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3797 ||:   4%|3         | 4/110 [00:03<01:18,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3805 ||:   5%|4         | 5/110 [00:03<01:15,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3950 ||:   5%|5         | 6/110 [00:04<01:13,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3860 ||:   6%|6         | 7/110 [00:05<01:13,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3877 ||:   7%|7         | 8/110 [00:05<01:14,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3734 ||:   8%|8         | 9/110 [00:06<01:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3769 ||:   9%|9         | 10/110 [00:07<01:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3733 ||:  10%|#         | 11/110 [00:07<01:07,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3801 ||:  11%|#         | 12/110 [00:08<01:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3795 ||:  12%|#1        | 13/110 [00:09<01:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3757 ||:  13%|#2        | 14/110 [00:09<01:05,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3760 ||:  14%|#3        | 15/110 [00:10<01:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3750 ||:  15%|#4        | 16/110 [00:11<01:02,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3771 ||:  15%|#5        | 17/110 [00:11<01:00,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3739 ||:  16%|#6        | 18/110 [00:12<01:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3829 ||:  17%|#7        | 19/110 [00:13<01:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3848 ||:  18%|#8        | 20/110 [00:13<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3852 ||:  19%|#9        | 21/110 [00:14<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3820 ||:  20%|##        | 22/110 [00:15<00:59,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3847 ||:  21%|##        | 23/110 [00:16<01:06,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3825 ||:  22%|##1       | 24/110 [00:17<01:07,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3841 ||:  23%|##2       | 25/110 [00:17<01:03,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3874 ||:  24%|##3       | 26/110 [00:18<01:03,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3867 ||:  25%|##4       | 27/110 [00:19<01:00,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3858 ||:  25%|##5       | 28/110 [00:19<00:56,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3891 ||:  26%|##6       | 29/110 [00:20<00:54,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3898 ||:  27%|##7       | 30/110 [00:21<00:55,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3865 ||:  28%|##8       | 31/110 [00:21<00:53,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3881 ||:  29%|##9       | 32/110 [00:22<00:54,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3888 ||:  30%|###       | 33/110 [00:23<00:53,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3902 ||:  31%|###       | 34/110 [00:23<00:52,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3894 ||:  32%|###1      | 35/110 [00:24<00:56,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3871 ||:  33%|###2      | 36/110 [00:25<00:53,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3884 ||:  34%|###3      | 37/110 [00:26<00:52,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3900 ||:  35%|###4      | 38/110 [00:26<00:52,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3871 ||:  35%|###5      | 39/110 [00:27<00:53,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3853 ||:  36%|###6      | 40/110 [00:28<00:50,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3856 ||:  37%|###7      | 41/110 [00:29<00:50,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3869 ||:  38%|###8      | 42/110 [00:29<00:52,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3863 ||:  39%|###9      | 43/110 [00:30<00:50,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3846 ||:  40%|####      | 44/110 [00:31<00:48,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3825 ||:  41%|####      | 45/110 [00:32<00:46,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3828 ||:  42%|####1     | 46/110 [00:32<00:45,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3843 ||:  43%|####2     | 47/110 [00:33<00:45,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3853 ||:  44%|####3     | 48/110 [00:34<00:46,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3824 ||:  45%|####4     | 49/110 [00:35<00:44,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3836 ||:  45%|####5     | 50/110 [00:35<00:44,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3827 ||:  46%|####6     | 51/110 [00:36<00:44,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3842 ||:  47%|####7     | 52/110 [00:37<00:41,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3856 ||:  48%|####8     | 53/110 [00:37<00:39,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3875 ||:  49%|####9     | 54/110 [00:38<00:40,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3872 ||:  50%|#####     | 55/110 [00:39<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3881 ||:  51%|#####     | 56/110 [00:39<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3888 ||:  52%|#####1    | 57/110 [00:40<00:35,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3888 ||:  53%|#####2    | 58/110 [00:41<00:35,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3891 ||:  54%|#####3    | 59/110 [00:41<00:34,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3905 ||:  55%|#####4    | 60/110 [00:42<00:34,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3914 ||:  55%|#####5    | 61/110 [00:43<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3916 ||:  56%|#####6    | 62/110 [00:44<00:33,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3908 ||:  57%|#####7    | 63/110 [00:44<00:32,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3908 ||:  58%|#####8    | 64/110 [00:45<00:32,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3897 ||:  59%|#####9    | 65/110 [00:46<00:31,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3900 ||:  60%|######    | 66/110 [00:46<00:30,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3895 ||:  61%|######    | 67/110 [00:47<00:30,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3886 ||:  62%|######1   | 68/110 [00:48<00:29,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3890 ||:  63%|######2   | 69/110 [00:48<00:28,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3893 ||:  64%|######3   | 70/110 [00:49<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3891 ||:  65%|######4   | 71/110 [00:50<00:27,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3900 ||:  65%|######5   | 72/110 [00:51<00:28,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3901 ||:  66%|######6   | 73/110 [00:51<00:27,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3902 ||:  67%|######7   | 74/110 [00:52<00:26,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3905 ||:  68%|######8   | 75/110 [00:53<00:25,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3911 ||:  69%|######9   | 76/110 [00:53<00:23,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3912 ||:  70%|#######   | 77/110 [00:54<00:22,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3901 ||:  71%|#######   | 78/110 [00:55<00:20,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3892 ||:  72%|#######1  | 79/110 [00:55<00:19,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3898 ||:  73%|#######2  | 80/110 [00:57<00:28,  1.07it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3897 ||:  74%|#######3  | 81/110 [00:58<00:25,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3891 ||:  75%|#######4  | 82/110 [00:58<00:22,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3883 ||:  75%|#######5  | 83/110 [00:59<00:20,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3883 ||:  76%|#######6  | 84/110 [01:00<00:18,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3885 ||:  77%|#######7  | 85/110 [01:00<00:17,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3892 ||:  78%|#######8  | 86/110 [01:01<00:17,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3890 ||:  79%|#######9  | 87/110 [01:02<00:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3898 ||:  80%|########  | 88/110 [01:02<00:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3906 ||:  81%|########  | 89/110 [01:03<00:15,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3903 ||:  82%|########1 | 90/110 [01:04<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3900 ||:  83%|########2 | 91/110 [01:04<00:12,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3904 ||:  84%|########3 | 92/110 [01:05<00:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3906 ||:  85%|########4 | 93/110 [01:06<00:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3904 ||:  85%|########5 | 94/110 [01:06<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3905 ||:  86%|########6 | 95/110 [01:07<00:09,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3899 ||:  87%|########7 | 96/110 [01:08<00:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3893 ||:  88%|########8 | 97/110 [01:08<00:08,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3901 ||:  89%|########9 | 98/110 [01:09<00:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3905 ||:  90%|######### | 99/110 [01:10<00:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3901 ||:  91%|######### | 100/110 [01:11<00:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3900 ||:  92%|#########1| 101/110 [01:11<00:06,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3906 ||:  93%|#########2| 102/110 [01:12<00:06,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3901 ||:  94%|#########3| 103/110 [01:13<00:05,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3908 ||:  95%|#########4| 104/110 [01:14<00:04,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3905 ||:  95%|#########5| 105/110 [01:14<00:03,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3899 ||:  96%|#########6| 106/110 [01:15<00:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3898 ||:  97%|#########7| 107/110 [01:16<00:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3898 ||:  98%|#########8| 108/110 [01:16<00:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3899 ||:  99%|#########9| 109/110 [01:17<00:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3891 ||: 100%|##########| 110/110 [01:17<00:00,  1.72it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3891 ||: 100%|##########| 110/110 [01:17<00:00,  1.41it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6620, acc: 0.6562, no_result: 0.1250, loss: 0.5942 ||:   4%|4         | 1/24 [00:00<00:08,  2.66it/s]
+BLEU: 0.6175, acc: 0.7188, no_result: 0.0938, loss: 0.5828 ||:   8%|8         | 2/24 [00:00<00:07,  2.76it/s]
+BLEU: 0.5812, acc: 0.6667, no_result: 0.1250, loss: 0.6751 ||:  12%|#2        | 3/24 [00:01<00:07,  2.77it/s]
+BLEU: 0.6236, acc: 0.6719, no_result: 0.1016, loss: 0.6721 ||:  17%|#6        | 4/24 [00:01<00:07,  2.73it/s]
+BLEU: 0.6604, acc: 0.6937, no_result: 0.0938, loss: 0.6411 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.6586, acc: 0.6562, no_result: 0.0885, loss: 0.6565 ||:  25%|##5       | 6/24 [00:02<00:06,  2.81it/s]
+BLEU: 0.6652, acc: 0.6696, no_result: 0.0848, loss: 0.6428 ||:  29%|##9       | 7/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.6602, acc: 0.6641, no_result: 0.0820, loss: 0.6611 ||:  33%|###3      | 8/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.6458, acc: 0.6701, no_result: 0.0799, loss: 0.6544 ||:  38%|###7      | 9/24 [00:03<00:05,  2.92it/s]
+BLEU: 0.6549, acc: 0.6719, no_result: 0.0719, loss: 0.6649 ||:  42%|####1     | 10/24 [00:03<00:04,  2.89it/s]
+BLEU: 0.6511, acc: 0.6619, no_result: 0.0767, loss: 0.6943 ||:  46%|####5     | 11/24 [00:03<00:04,  2.75it/s]
+BLEU: 0.6444, acc: 0.6641, no_result: 0.0703, loss: 0.6887 ||:  50%|#####     | 12/24 [00:04<00:04,  2.85it/s]
+BLEU: 0.6301, acc: 0.6538, no_result: 0.0769, loss: 0.7113 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.72it/s]
+BLEU: 0.6313, acc: 0.6339, no_result: 0.0826, loss: 0.7311 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.64it/s]
+BLEU: 0.6128, acc: 0.6167, no_result: 0.0979, loss: 0.7721 ||:  62%|######2   | 15/24 [00:05<00:03,  2.47it/s]
+BLEU: 0.5988, acc: 0.6016, no_result: 0.1055, loss: 0.7878 ||:  67%|######6   | 16/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.5993, acc: 0.5993, no_result: 0.1103, loss: 0.7834 ||:  71%|#######   | 17/24 [00:06<00:02,  2.48it/s]
+BLEU: 0.6004, acc: 0.6007, no_result: 0.1094, loss: 0.7875 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.47it/s]
+BLEU: 0.6067, acc: 0.6069, no_result: 0.1069, loss: 0.7715 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.6063, acc: 0.6094, no_result: 0.1031, loss: 0.7744 ||:  83%|########3 | 20/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.6031, acc: 0.6042, no_result: 0.1012, loss: 0.7734 ||:  88%|########7 | 21/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.6076, acc: 0.6009, no_result: 0.0994, loss: 0.7675 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.6124, acc: 0.6060, no_result: 0.0965, loss: 0.7567 ||:  96%|#########5| 23/24 [00:08<00:00,  2.73it/s]
+BLEU: 0.6125, acc: 0.5992, no_result: 0.0971, loss: 0.7628 ||: 100%|##########| 24/24 [00:08<00:00,  3.36it/s]
+BLEU: 0.6125, acc: 0.5992, no_result: 0.0971, loss: 0.7628 ||: 100%|##########| 24/24 [00:08<00:00,  2.77it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3378 ||:   1%|          | 1/110 [00:00<01:17,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3173 ||:   2%|1         | 2/110 [00:01<01:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3301 ||:   3%|2         | 3/110 [00:02<01:11,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3188 ||:   4%|3         | 4/110 [00:02<01:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3218 ||:   5%|4         | 5/110 [00:03<01:18,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3345 ||:   5%|5         | 6/110 [00:04<01:13,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3428 ||:   6%|6         | 7/110 [00:04<01:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3352 ||:   7%|7         | 8/110 [00:05<01:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3417 ||:   8%|8         | 9/110 [00:06<01:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3435 ||:   9%|9         | 10/110 [00:06<01:07,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3494 ||:  10%|#         | 11/110 [00:07<01:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3578 ||:  11%|#         | 12/110 [00:08<01:10,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3608 ||:  12%|#1        | 13/110 [00:09<01:07,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3613 ||:  13%|#2        | 14/110 [00:09<01:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3621 ||:  14%|#3        | 15/110 [00:10<01:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3647 ||:  15%|#4        | 16/110 [00:11<01:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3662 ||:  15%|#5        | 17/110 [00:11<01:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3653 ||:  16%|#6        | 18/110 [00:12<01:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3652 ||:  17%|#7        | 19/110 [00:13<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3640 ||:  18%|#8        | 20/110 [00:13<00:59,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3645 ||:  19%|#9        | 21/110 [00:14<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3631 ||:  20%|##        | 22/110 [00:15<01:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3612 ||:  21%|##        | 23/110 [00:15<00:57,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3623 ||:  22%|##1       | 24/110 [00:16<00:55,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3581 ||:  23%|##2       | 25/110 [00:17<01:01,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3579 ||:  24%|##3       | 26/110 [00:17<00:56,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3616 ||:  25%|##4       | 27/110 [00:18<00:54,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3622 ||:  25%|##5       | 28/110 [00:19<00:55,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3617 ||:  26%|##6       | 29/110 [00:19<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3598 ||:  27%|##7       | 30/110 [00:20<00:56,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3619 ||:  28%|##8       | 31/110 [00:21<00:54,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3581 ||:  29%|##9       | 32/110 [00:21<00:53,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3575 ||:  30%|###       | 33/110 [00:22<00:50,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3578 ||:  31%|###       | 34/110 [00:23<00:50,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3584 ||:  32%|###1      | 35/110 [00:23<00:50,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3574 ||:  33%|###2      | 36/110 [00:24<00:48,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3586 ||:  34%|###3      | 37/110 [00:25<00:48,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3592 ||:  35%|###4      | 38/110 [00:25<00:49,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3571 ||:  35%|###5      | 39/110 [00:26<00:48,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3560 ||:  36%|###6      | 40/110 [00:27<00:46,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3551 ||:  37%|###7      | 41/110 [00:27<00:44,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3559 ||:  38%|###8      | 42/110 [00:28<00:44,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3553 ||:  39%|###9      | 43/110 [00:29<00:47,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3556 ||:  40%|####      | 44/110 [00:30<00:48,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3563 ||:  41%|####      | 45/110 [00:30<00:46,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3562 ||:  42%|####1     | 46/110 [00:31<00:44,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3567 ||:  43%|####2     | 47/110 [00:32<00:41,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3560 ||:  44%|####3     | 48/110 [00:32<00:40,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3571 ||:  45%|####4     | 49/110 [00:33<00:39,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3565 ||:  45%|####5     | 50/110 [00:33<00:37,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3568 ||:  46%|####6     | 51/110 [00:34<00:38,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3589 ||:  47%|####7     | 52/110 [00:35<00:39,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3588 ||:  48%|####8     | 53/110 [00:36<00:39,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3589 ||:  49%|####9     | 54/110 [00:36<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3586 ||:  50%|#####     | 55/110 [00:37<00:39,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3577 ||:  51%|#####     | 56/110 [00:38<00:38,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3585 ||:  52%|#####1    | 57/110 [00:38<00:35,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3569 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3562 ||:  54%|#####3    | 59/110 [00:40<00:35,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3575 ||:  55%|#####4    | 60/110 [00:40<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3577 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3593 ||:  56%|#####6    | 62/110 [00:42<00:32,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3600 ||:  57%|#####7    | 63/110 [00:42<00:30,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3588 ||:  58%|#####8    | 64/110 [00:43<00:30,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3607 ||:  59%|#####9    | 65/110 [00:44<00:29,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3611 ||:  60%|######    | 66/110 [00:44<00:28,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3606 ||:  61%|######    | 67/110 [00:45<00:29,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3609 ||:  62%|######1   | 68/110 [00:46<00:29,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3625 ||:  63%|######2   | 69/110 [00:47<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3637 ||:  64%|######3   | 70/110 [00:48<00:37,  1.06it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3634 ||:  65%|######4   | 71/110 [00:49<00:36,  1.07it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3626 ||:  65%|######5   | 72/110 [00:50<00:32,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3619 ||:  66%|######6   | 73/110 [00:50<00:29,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3613 ||:  67%|######7   | 74/110 [00:51<00:28,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3623 ||:  68%|######8   | 75/110 [00:52<00:25,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3613 ||:  69%|######9   | 76/110 [00:52<00:24,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3624 ||:  70%|#######   | 77/110 [00:53<00:22,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3616 ||:  71%|#######   | 78/110 [00:54<00:21,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3626 ||:  72%|#######1  | 79/110 [00:54<00:20,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3629 ||:  73%|#######2  | 80/110 [00:55<00:19,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3638 ||:  74%|#######3  | 81/110 [00:55<00:18,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3632 ||:  75%|#######4  | 82/110 [00:56<00:17,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3649 ||:  75%|#######5  | 83/110 [00:57<00:17,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3656 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3662 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3672 ||:  78%|#######8  | 86/110 [00:59<00:15,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3674 ||:  79%|#######9  | 87/110 [00:59<00:14,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3670 ||:  80%|########  | 88/110 [01:00<00:13,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3674 ||:  81%|########  | 89/110 [01:01<00:13,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3669 ||:  82%|########1 | 90/110 [01:01<00:12,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3667 ||:  83%|########2 | 91/110 [01:02<00:11,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3665 ||:  84%|########3 | 92/110 [01:03<00:12,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3669 ||:  85%|########4 | 93/110 [01:03<00:11,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3668 ||:  85%|########5 | 94/110 [01:04<00:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3671 ||:  86%|########6 | 95/110 [01:05<00:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3667 ||:  87%|########7 | 96/110 [01:05<00:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3677 ||:  88%|########8 | 97/110 [01:06<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3674 ||:  89%|########9 | 98/110 [01:07<00:07,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3668 ||:  90%|######### | 99/110 [01:07<00:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3664 ||:  91%|######### | 100/110 [01:08<00:06,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3662 ||:  92%|#########1| 101/110 [01:09<00:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3663 ||:  93%|#########2| 102/110 [01:09<00:05,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3661 ||:  94%|#########3| 103/110 [01:10<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3660 ||:  95%|#########4| 104/110 [01:11<00:03,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3666 ||:  95%|#########5| 105/110 [01:11<00:03,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3674 ||:  96%|#########6| 106/110 [01:12<00:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3678 ||:  97%|#########7| 107/110 [01:13<00:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3678 ||:  98%|#########8| 108/110 [01:13<00:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3678 ||:  99%|#########9| 109/110 [01:14<00:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3673 ||: 100%|##########| 110/110 [01:14<00:00,  1.75it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3673 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6870, acc: 0.8125, no_result: 0.0625, loss: 0.6717 ||:   4%|4         | 1/24 [00:00<00:08,  2.72it/s]
+BLEU: 0.6212, acc: 0.7500, no_result: 0.0625, loss: 0.6809 ||:   8%|8         | 2/24 [00:00<00:07,  2.83it/s]
+BLEU: 0.5898, acc: 0.6979, no_result: 0.1042, loss: 0.7706 ||:  12%|#2        | 3/24 [00:01<00:07,  2.86it/s]
+BLEU: 0.6230, acc: 0.7031, no_result: 0.0938, loss: 0.7506 ||:  17%|#6        | 4/24 [00:01<00:07,  2.76it/s]
+BLEU: 0.6632, acc: 0.7125, no_result: 0.0875, loss: 0.7173 ||:  21%|##        | 5/24 [00:01<00:06,  2.73it/s]
+BLEU: 0.6633, acc: 0.6667, no_result: 0.0885, loss: 0.7260 ||:  25%|##5       | 6/24 [00:02<00:06,  2.85it/s]
+BLEU: 0.6606, acc: 0.6607, no_result: 0.0848, loss: 0.7095 ||:  29%|##9       | 7/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.6445, acc: 0.6562, no_result: 0.0859, loss: 0.7308 ||:  33%|###3      | 8/24 [00:02<00:05,  2.99it/s]
+BLEU: 0.6345, acc: 0.6667, no_result: 0.0799, loss: 0.7231 ||:  38%|###7      | 9/24 [00:03<00:05,  2.98it/s]
+BLEU: 0.6410, acc: 0.6656, no_result: 0.0750, loss: 0.7330 ||:  42%|####1     | 10/24 [00:03<00:04,  2.91it/s]
+BLEU: 0.6417, acc: 0.6364, no_result: 0.0852, loss: 0.7548 ||:  46%|####5     | 11/24 [00:03<00:04,  2.76it/s]
+BLEU: 0.6344, acc: 0.6432, no_result: 0.0807, loss: 0.7562 ||:  50%|#####     | 12/24 [00:04<00:04,  2.87it/s]
+BLEU: 0.6243, acc: 0.6298, no_result: 0.0962, loss: 0.7779 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.73it/s]
+BLEU: 0.6276, acc: 0.6161, no_result: 0.1004, loss: 0.7997 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.62it/s]
+BLEU: 0.6094, acc: 0.5979, no_result: 0.1187, loss: 0.8388 ||:  62%|######2   | 15/24 [00:05<00:03,  2.46it/s]
+BLEU: 0.5929, acc: 0.5840, no_result: 0.1250, loss: 0.8524 ||:  67%|######6   | 16/24 [00:05<00:03,  2.43it/s]
+BLEU: 0.5918, acc: 0.5790, no_result: 0.1324, loss: 0.8450 ||:  71%|#######   | 17/24 [00:06<00:02,  2.49it/s]
+BLEU: 0.5938, acc: 0.5851, no_result: 0.1302, loss: 0.8529 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.49it/s]
+BLEU: 0.6008, acc: 0.5954, no_result: 0.1283, loss: 0.8339 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.57it/s]
+BLEU: 0.5997, acc: 0.5984, no_result: 0.1250, loss: 0.8370 ||:  83%|########3 | 20/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.5945, acc: 0.5938, no_result: 0.1220, loss: 0.8347 ||:  88%|########7 | 21/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.6027, acc: 0.5923, no_result: 0.1207, loss: 0.8286 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.6079, acc: 0.6005, no_result: 0.1168, loss: 0.8151 ||:  96%|#########5| 23/24 [00:08<00:00,  2.75it/s]
+BLEU: 0.6080, acc: 0.5987, no_result: 0.1166, loss: 0.8231 ||: 100%|##########| 24/24 [00:08<00:00,  3.40it/s]
+BLEU: 0.6080, acc: 0.5987, no_result: 0.1166, loss: 0.8231 ||: 100%|##########| 24/24 [00:08<00:00,  2.79it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3381 ||:   1%|          | 1/110 [00:00<01:09,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3348 ||:   2%|1         | 2/110 [00:01<01:09,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3177 ||:   3%|2         | 3/110 [00:01<01:07,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3211 ||:   4%|3         | 4/110 [00:02<01:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3214 ||:   5%|4         | 5/110 [00:03<01:07,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3420 ||:   5%|5         | 6/110 [00:03<01:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3322 ||:   6%|6         | 7/110 [00:04<01:06,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3240 ||:   7%|7         | 8/110 [00:05<01:05,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3162 ||:   8%|8         | 9/110 [00:05<01:04,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3227 ||:   9%|9         | 10/110 [00:06<01:00,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3230 ||:  10%|#         | 11/110 [00:06<01:01,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3242 ||:  11%|#         | 12/110 [00:07<01:00,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3234 ||:  12%|#1        | 13/110 [00:08<01:00,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3287 ||:  13%|#2        | 14/110 [00:08<00:57,  1.66it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3328 ||:  14%|#3        | 15/110 [00:09<00:59,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3386 ||:  15%|#4        | 16/110 [00:10<01:00,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3403 ||:  15%|#5        | 17/110 [00:10<00:59,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3460 ||:  16%|#6        | 18/110 [00:11<00:59,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3463 ||:  17%|#7        | 19/110 [00:12<01:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3468 ||:  18%|#8        | 20/110 [00:12<00:59,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3481 ||:  19%|#9        | 21/110 [00:13<01:01,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3434 ||:  20%|##        | 22/110 [00:14<00:59,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3372 ||:  21%|##        | 23/110 [00:14<01:00,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3347 ||:  22%|##1       | 24/110 [00:15<00:57,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3349 ||:  23%|##2       | 25/110 [00:16<00:54,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3372 ||:  24%|##3       | 26/110 [00:16<00:55,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3404 ||:  25%|##4       | 27/110 [00:17<00:54,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3383 ||:  25%|##5       | 28/110 [00:18<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3385 ||:  26%|##6       | 29/110 [00:18<00:55,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3419 ||:  27%|##7       | 30/110 [00:19<00:53,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3401 ||:  28%|##8       | 31/110 [00:20<00:57,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3398 ||:  29%|##9       | 32/110 [00:21<00:55,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3392 ||:  30%|###       | 33/110 [00:21<00:53,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3382 ||:  31%|###       | 34/110 [00:22<00:54,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3381 ||:  32%|###1      | 35/110 [00:23<00:51,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3369 ||:  33%|###2      | 36/110 [00:23<00:50,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3375 ||:  34%|###3      | 37/110 [00:24<00:52,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3372 ||:  35%|###4      | 38/110 [00:25<00:50,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3396 ||:  35%|###5      | 39/110 [00:25<00:50,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3409 ||:  36%|###6      | 40/110 [00:26<00:48,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3380 ||:  37%|###7      | 41/110 [00:27<00:46,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3365 ||:  38%|###8      | 42/110 [00:28<00:47,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3363 ||:  39%|###9      | 43/110 [00:28<00:45,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3377 ||:  40%|####      | 44/110 [00:29<00:43,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3375 ||:  41%|####      | 45/110 [00:29<00:41,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3392 ||:  42%|####1     | 46/110 [00:30<00:41,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3415 ||:  43%|####2     | 47/110 [00:31<00:41,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3412 ||:  44%|####3     | 48/110 [00:31<00:41,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3408 ||:  45%|####4     | 49/110 [00:32<00:39,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3426 ||:  45%|####5     | 50/110 [00:33<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3427 ||:  46%|####6     | 51/110 [00:33<00:37,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3434 ||:  47%|####7     | 52/110 [00:34<00:37,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3445 ||:  48%|####8     | 53/110 [00:34<00:34,  1.65it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3443 ||:  49%|####9     | 54/110 [00:35<00:32,  1.71it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3444 ||:  50%|#####     | 55/110 [00:36<00:37,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3452 ||:  51%|#####     | 56/110 [00:37<00:37,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3448 ||:  52%|#####1    | 57/110 [00:37<00:35,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3465 ||:  53%|#####2    | 58/110 [00:38<00:34,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3466 ||:  54%|#####3    | 59/110 [00:39<00:34,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3452 ||:  55%|#####4    | 60/110 [00:40<00:46,  1.07it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3442 ||:  55%|#####5    | 61/110 [00:41<00:42,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3439 ||:  56%|#####6    | 62/110 [00:41<00:38,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3440 ||:  57%|#####7    | 63/110 [00:42<00:35,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3444 ||:  58%|#####8    | 64/110 [00:43<00:35,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3442 ||:  59%|#####9    | 65/110 [00:44<00:35,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3456 ||:  60%|######    | 66/110 [00:44<00:32,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3467 ||:  61%|######    | 67/110 [00:45<00:30,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3468 ||:  62%|######1   | 68/110 [00:46<00:30,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3472 ||:  63%|######2   | 69/110 [00:46<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3487 ||:  64%|######3   | 70/110 [00:47<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3481 ||:  65%|######4   | 71/110 [00:48<00:27,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3480 ||:  65%|######5   | 72/110 [00:49<00:26,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3488 ||:  66%|######6   | 73/110 [00:49<00:26,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3495 ||:  67%|######7   | 74/110 [00:50<00:24,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3492 ||:  68%|######8   | 75/110 [00:50<00:22,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3492 ||:  69%|######9   | 76/110 [00:51<00:21,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3500 ||:  70%|#######   | 77/110 [00:52<00:21,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3500 ||:  71%|#######   | 78/110 [00:52<00:20,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3506 ||:  72%|#######1  | 79/110 [00:53<00:19,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3506 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3508 ||:  74%|#######3  | 81/110 [00:54<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3513 ||:  75%|#######4  | 82/110 [00:55<00:17,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3514 ||:  75%|#######5  | 83/110 [00:56<00:16,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3512 ||:  76%|#######6  | 84/110 [00:56<00:15,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3524 ||:  77%|#######7  | 85/110 [00:57<00:15,  1.66it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3518 ||:  78%|#######8  | 86/110 [00:57<00:15,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3523 ||:  79%|#######9  | 87/110 [00:58<00:14,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3533 ||:  80%|########  | 88/110 [00:59<00:14,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3533 ||:  81%|########  | 89/110 [00:59<00:14,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3548 ||:  82%|########1 | 90/110 [01:00<00:13,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3543 ||:  83%|########2 | 91/110 [01:01<00:12,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3546 ||:  84%|########3 | 92/110 [01:01<00:11,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3551 ||:  85%|########4 | 93/110 [01:02<00:10,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3556 ||:  85%|########5 | 94/110 [01:03<00:10,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3559 ||:  86%|########6 | 95/110 [01:03<00:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3550 ||:  87%|########7 | 96/110 [01:04<00:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3547 ||:  88%|########8 | 97/110 [01:05<00:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3560 ||:  89%|########9 | 98/110 [01:05<00:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3562 ||:  90%|######### | 99/110 [01:06<00:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3557 ||:  91%|######### | 100/110 [01:07<00:06,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3560 ||:  92%|#########1| 101/110 [01:07<00:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3569 ||:  93%|#########2| 102/110 [01:08<00:05,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3571 ||:  94%|#########3| 103/110 [01:09<00:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3576 ||:  95%|#########4| 104/110 [01:09<00:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3575 ||:  95%|#########5| 105/110 [01:10<00:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3579 ||:  96%|#########6| 106/110 [01:11<00:02,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3588 ||:  97%|#########7| 107/110 [01:12<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3588 ||:  98%|#########8| 108/110 [01:12<00:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3587 ||:  99%|#########9| 109/110 [01:13<00:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3589 ||: 100%|##########| 110/110 [01:13<00:00,  1.76it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3589 ||: 100%|##########| 110/110 [01:13<00:00,  1.49it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6435, acc: 0.7500, no_result: 0.0312, loss: 0.6363 ||:   4%|4         | 1/24 [00:00<00:08,  2.81it/s]
+BLEU: 0.5897, acc: 0.7031, no_result: 0.0156, loss: 0.6188 ||:   8%|8         | 2/24 [00:00<00:07,  2.89it/s]
+BLEU: 0.5881, acc: 0.6771, no_result: 0.0729, loss: 0.6875 ||:  12%|#2        | 3/24 [00:01<00:07,  2.84it/s]
+BLEU: 0.6152, acc: 0.6641, no_result: 0.0703, loss: 0.6799 ||:  17%|#6        | 4/24 [00:01<00:07,  2.77it/s]
+BLEU: 0.6498, acc: 0.6687, no_result: 0.0813, loss: 0.6577 ||:  21%|##        | 5/24 [00:01<00:06,  2.73it/s]
+BLEU: 0.6423, acc: 0.6406, no_result: 0.0833, loss: 0.6934 ||:  25%|##5       | 6/24 [00:02<00:06,  2.86it/s]
+BLEU: 0.6465, acc: 0.6518, no_result: 0.0804, loss: 0.6807 ||:  29%|##9       | 7/24 [00:02<00:05,  2.98it/s]
+BLEU: 0.6427, acc: 0.6680, no_result: 0.0781, loss: 0.6910 ||:  33%|###3      | 8/24 [00:02<00:05,  3.02it/s]
+BLEU: 0.6347, acc: 0.6806, no_result: 0.0729, loss: 0.6903 ||:  38%|###7      | 9/24 [00:03<00:05,  2.98it/s]
+BLEU: 0.6456, acc: 0.6750, no_result: 0.0688, loss: 0.6995 ||:  42%|####1     | 10/24 [00:03<00:04,  2.93it/s]
+BLEU: 0.6420, acc: 0.6648, no_result: 0.0739, loss: 0.7227 ||:  46%|####5     | 11/24 [00:03<00:04,  2.81it/s]
+BLEU: 0.6373, acc: 0.6667, no_result: 0.0677, loss: 0.7185 ||:  50%|#####     | 12/24 [00:04<00:04,  2.90it/s]
+BLEU: 0.6247, acc: 0.6635, no_result: 0.0745, loss: 0.7309 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.78it/s]
+BLEU: 0.6274, acc: 0.6540, no_result: 0.0781, loss: 0.7495 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.69it/s]
+BLEU: 0.6058, acc: 0.6312, no_result: 0.0958, loss: 0.7974 ||:  62%|######2   | 15/24 [00:05<00:03,  2.49it/s]
+BLEU: 0.5916, acc: 0.6133, no_result: 0.1035, loss: 0.8177 ||:  67%|######6   | 16/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.5962, acc: 0.6121, no_result: 0.1103, loss: 0.8149 ||:  71%|#######   | 17/24 [00:06<00:02,  2.51it/s]
+BLEU: 0.5985, acc: 0.6076, no_result: 0.1163, loss: 0.8209 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.49it/s]
+BLEU: 0.6027, acc: 0.6151, no_result: 0.1135, loss: 0.8046 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.59it/s]
+BLEU: 0.6018, acc: 0.6172, no_result: 0.1109, loss: 0.8053 ||:  83%|########3 | 20/24 [00:07<00:01,  2.72it/s]
+BLEU: 0.5980, acc: 0.6116, no_result: 0.1086, loss: 0.8045 ||:  88%|########7 | 21/24 [00:07<00:01,  2.73it/s]
+BLEU: 0.6018, acc: 0.6023, no_result: 0.1094, loss: 0.8011 ||:  92%|#########1| 22/24 [00:08<00:00,  2.69it/s]
+BLEU: 0.6057, acc: 0.6101, no_result: 0.1046, loss: 0.7892 ||:  96%|#########5| 23/24 [00:08<00:00,  2.78it/s]
+BLEU: 0.6044, acc: 0.5985, no_result: 0.1095, loss: 0.8029 ||: 100%|##########| 24/24 [00:08<00:00,  3.44it/s]
+BLEU: 0.6044, acc: 0.5985, no_result: 0.1095, loss: 0.8029 ||: 100%|##########| 24/24 [00:08<00:00,  2.82it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3847 ||:   1%|          | 1/110 [00:00<01:10,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3359 ||:   2%|1         | 2/110 [00:01<01:09,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3289 ||:   3%|2         | 3/110 [00:01<01:11,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3224 ||:   4%|3         | 4/110 [00:02<01:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3378 ||:   5%|4         | 5/110 [00:03<01:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3446 ||:   5%|5         | 6/110 [00:03<01:06,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3400 ||:   6%|6         | 7/110 [00:04<01:05,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3401 ||:   7%|7         | 8/110 [00:05<01:05,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3255 ||:   8%|8         | 9/110 [00:05<01:03,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3283 ||:   9%|9         | 10/110 [00:06<01:03,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3206 ||:  10%|#         | 11/110 [00:07<01:06,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3230 ||:  11%|#         | 12/110 [00:07<01:01,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3243 ||:  12%|#1        | 13/110 [00:08<01:02,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3265 ||:  13%|#2        | 14/110 [00:08<01:00,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3280 ||:  14%|#3        | 15/110 [00:09<01:01,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3281 ||:  15%|#4        | 16/110 [00:10<01:00,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3241 ||:  15%|#5        | 17/110 [00:11<01:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3254 ||:  16%|#6        | 18/110 [00:11<01:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3311 ||:  17%|#7        | 19/110 [00:12<01:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3320 ||:  18%|#8        | 20/110 [00:13<01:01,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3327 ||:  19%|#9        | 21/110 [00:13<00:57,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3301 ||:  20%|##        | 22/110 [00:14<00:55,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3319 ||:  21%|##        | 23/110 [00:14<00:55,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3309 ||:  22%|##1       | 24/110 [00:15<00:56,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3334 ||:  23%|##2       | 25/110 [00:16<00:59,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3336 ||:  24%|##3       | 26/110 [00:17<00:58,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3369 ||:  25%|##4       | 27/110 [00:17<00:57,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3380 ||:  25%|##5       | 28/110 [00:18<00:56,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3369 ||:  26%|##6       | 29/110 [00:19<00:54,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3367 ||:  27%|##7       | 30/110 [00:19<00:52,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3340 ||:  28%|##8       | 31/110 [00:20<00:51,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3352 ||:  29%|##9       | 32/110 [00:20<00:50,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3344 ||:  30%|###       | 33/110 [00:21<00:49,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3324 ||:  31%|###       | 34/110 [00:22<00:48,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3337 ||:  32%|###1      | 35/110 [00:22<00:48,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3335 ||:  33%|###2      | 36/110 [00:23<00:49,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3351 ||:  34%|###3      | 37/110 [00:24<00:46,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3356 ||:  35%|###4      | 38/110 [00:24<00:46,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3340 ||:  35%|###5      | 39/110 [00:25<00:45,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3357 ||:  36%|###6      | 40/110 [00:26<00:45,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3387 ||:  37%|###7      | 41/110 [00:26<00:44,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3376 ||:  38%|###8      | 42/110 [00:27<00:43,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3381 ||:  39%|###9      | 43/110 [00:28<00:43,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3388 ||:  40%|####      | 44/110 [00:28<00:41,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3384 ||:  41%|####      | 45/110 [00:29<00:40,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3387 ||:  42%|####1     | 46/110 [00:29<00:39,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3386 ||:  43%|####2     | 47/110 [00:30<00:44,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3385 ||:  44%|####3     | 48/110 [00:31<00:42,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3378 ||:  45%|####4     | 49/110 [00:32<00:42,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3373 ||:  45%|####5     | 50/110 [00:33<00:54,  1.10it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3383 ||:  46%|####6     | 51/110 [00:34<00:48,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3379 ||:  47%|####7     | 52/110 [00:35<00:48,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3376 ||:  48%|####8     | 53/110 [00:35<00:45,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3381 ||:  49%|####9     | 54/110 [00:36<00:44,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3388 ||:  50%|#####     | 55/110 [00:37<00:42,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3380 ||:  51%|#####     | 56/110 [00:37<00:38,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3393 ||:  52%|#####1    | 57/110 [00:38<00:39,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3391 ||:  53%|#####2    | 58/110 [00:39<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3402 ||:  54%|#####3    | 59/110 [00:39<00:35,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3388 ||:  55%|#####4    | 60/110 [00:40<00:36,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3388 ||:  55%|#####5    | 61/110 [00:41<00:34,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3405 ||:  56%|#####6    | 62/110 [00:42<00:33,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3411 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3404 ||:  58%|#####8    | 64/110 [00:43<00:32,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3408 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3415 ||:  60%|######    | 66/110 [00:44<00:31,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3419 ||:  61%|######    | 67/110 [00:45<00:29,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3419 ||:  62%|######1   | 68/110 [00:46<00:29,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3424 ||:  63%|######2   | 69/110 [00:46<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3419 ||:  64%|######3   | 70/110 [00:47<00:26,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3424 ||:  65%|######4   | 71/110 [00:48<00:26,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3443 ||:  65%|######5   | 72/110 [00:48<00:24,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3450 ||:  66%|######6   | 73/110 [00:49<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3459 ||:  67%|######7   | 74/110 [00:50<00:24,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3447 ||:  68%|######8   | 75/110 [00:50<00:23,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3441 ||:  69%|######9   | 76/110 [00:51<00:22,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3441 ||:  70%|#######   | 77/110 [00:52<00:21,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3444 ||:  71%|#######   | 78/110 [00:52<00:20,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3452 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3445 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3440 ||:  74%|#######3  | 81/110 [00:54<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3433 ||:  75%|#######4  | 82/110 [00:55<00:17,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3430 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3423 ||:  76%|#######6  | 84/110 [00:56<00:17,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3417 ||:  77%|#######7  | 85/110 [00:57<00:17,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3409 ||:  78%|#######8  | 86/110 [00:58<00:17,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3421 ||:  79%|#######9  | 87/110 [00:59<00:16,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3416 ||:  80%|########  | 88/110 [00:59<00:14,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3427 ||:  81%|########  | 89/110 [01:00<00:13,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3420 ||:  82%|########1 | 90/110 [01:00<00:12,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3412 ||:  83%|########2 | 91/110 [01:01<00:11,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3407 ||:  84%|########3 | 92/110 [01:02<00:11,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3417 ||:  85%|########4 | 93/110 [01:02<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3428 ||:  85%|########5 | 94/110 [01:03<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3422 ||:  86%|########6 | 95/110 [01:04<00:10,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3425 ||:  87%|########7 | 96/110 [01:04<00:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3430 ||:  88%|########8 | 97/110 [01:05<00:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3438 ||:  89%|########9 | 98/110 [01:06<00:07,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3439 ||:  90%|######### | 99/110 [01:06<00:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3437 ||:  91%|######### | 100/110 [01:07<00:06,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3429 ||:  92%|#########1| 101/110 [01:07<00:05,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3439 ||:  93%|#########2| 102/110 [01:08<00:04,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3435 ||:  94%|#########3| 103/110 [01:09<00:04,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3436 ||:  95%|#########4| 104/110 [01:09<00:03,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3437 ||:  95%|#########5| 105/110 [01:10<00:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3439 ||:  96%|#########6| 106/110 [01:11<00:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3435 ||:  97%|#########7| 107/110 [01:11<00:01,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3433 ||:  98%|#########8| 108/110 [01:12<00:01,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3431 ||:  99%|#########9| 109/110 [01:13<00:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3414 ||: 100%|##########| 110/110 [01:13<00:00,  1.78it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3414 ||: 100%|##########| 110/110 [01:13<00:00,  1.50it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6605, acc: 0.7188, no_result: 0.1250, loss: 0.5946 ||:   4%|4         | 1/24 [00:00<00:08,  2.81it/s]
+BLEU: 0.5741, acc: 0.7500, no_result: 0.0781, loss: 0.6335 ||:   8%|8         | 2/24 [00:00<00:07,  2.87it/s]
+BLEU: 0.5681, acc: 0.6875, no_result: 0.1250, loss: 0.7437 ||:  12%|#2        | 3/24 [00:01<00:07,  2.88it/s]
+BLEU: 0.5977, acc: 0.6797, no_result: 0.1172, loss: 0.7117 ||:  17%|#6        | 4/24 [00:01<00:07,  2.80it/s]
+BLEU: 0.6398, acc: 0.6937, no_result: 0.1062, loss: 0.6855 ||:  21%|##        | 5/24 [00:01<00:06,  2.76it/s]
+BLEU: 0.6351, acc: 0.6615, no_result: 0.1042, loss: 0.7019 ||:  25%|##5       | 6/24 [00:02<00:06,  2.88it/s]
+BLEU: 0.6421, acc: 0.6696, no_result: 0.0982, loss: 0.6915 ||:  29%|##9       | 7/24 [00:02<00:05,  2.99it/s]
+BLEU: 0.6390, acc: 0.6758, no_result: 0.0977, loss: 0.7050 ||:  33%|###3      | 8/24 [00:02<00:05,  3.02it/s]
+BLEU: 0.6288, acc: 0.6771, no_result: 0.0938, loss: 0.7013 ||:  38%|###7      | 9/24 [00:03<00:05,  3.00it/s]
+BLEU: 0.6375, acc: 0.6781, no_result: 0.0844, loss: 0.7101 ||:  42%|####1     | 10/24 [00:03<00:04,  2.93it/s]
+BLEU: 0.6412, acc: 0.6562, no_result: 0.0881, loss: 0.7330 ||:  46%|####5     | 11/24 [00:03<00:04,  2.80it/s]
+BLEU: 0.6346, acc: 0.6510, no_result: 0.0859, loss: 0.7343 ||:  50%|#####     | 12/24 [00:04<00:04,  2.90it/s]
+BLEU: 0.6227, acc: 0.6466, no_result: 0.0913, loss: 0.7574 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.78it/s]
+BLEU: 0.6232, acc: 0.6317, no_result: 0.0982, loss: 0.7797 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.68it/s]
+BLEU: 0.6014, acc: 0.6083, no_result: 0.1125, loss: 0.8219 ||:  62%|######2   | 15/24 [00:05<00:03,  2.49it/s]
+BLEU: 0.5874, acc: 0.5957, no_result: 0.1230, loss: 0.8385 ||:  67%|######6   | 16/24 [00:05<00:03,  2.43it/s]
+BLEU: 0.5890, acc: 0.5919, no_result: 0.1287, loss: 0.8354 ||:  71%|#######   | 17/24 [00:06<00:02,  2.50it/s]
+BLEU: 0.5881, acc: 0.5885, no_result: 0.1285, loss: 0.8439 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.50it/s]
+BLEU: 0.5945, acc: 0.5970, no_result: 0.1250, loss: 0.8260 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.59it/s]
+BLEU: 0.5953, acc: 0.6000, no_result: 0.1203, loss: 0.8237 ||:  83%|########3 | 20/24 [00:07<00:01,  2.73it/s]
+BLEU: 0.5908, acc: 0.5982, no_result: 0.1190, loss: 0.8222 ||:  88%|########7 | 21/24 [00:07<00:01,  2.71it/s]
+BLEU: 0.5962, acc: 0.6009, no_result: 0.1165, loss: 0.8159 ||:  92%|#########1| 22/24 [00:08<00:00,  2.69it/s]
+BLEU: 0.5991, acc: 0.6073, no_result: 0.1141, loss: 0.8010 ||:  96%|#########5| 23/24 [00:08<00:00,  2.79it/s]
+BLEU: 0.5988, acc: 0.6005, no_result: 0.1186, loss: 0.8074 ||: 100%|##########| 24/24 [00:08<00:00,  3.42it/s]
+BLEU: 0.5988, acc: 0.6005, no_result: 0.1186, loss: 0.8074 ||: 100%|##########| 24/24 [00:08<00:00,  2.82it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3327 ||:   1%|          | 1/110 [00:00<01:12,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3041 ||:   2%|1         | 2/110 [00:01<01:17,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2850 ||:   3%|2         | 3/110 [00:02<01:13,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3131 ||:   4%|3         | 4/110 [00:02<01:11,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2972 ||:   5%|4         | 5/110 [00:03<01:13,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2952 ||:   5%|5         | 6/110 [00:04<01:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3001 ||:   6%|6         | 7/110 [00:04<01:07,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2981 ||:   7%|7         | 8/110 [00:05<01:06,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3002 ||:   8%|8         | 9/110 [00:05<01:04,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2989 ||:   9%|9         | 10/110 [00:06<01:04,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2935 ||:  10%|#         | 11/110 [00:07<01:03,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2923 ||:  11%|#         | 12/110 [00:07<00:59,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2939 ||:  12%|#1        | 13/110 [00:08<01:02,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2899 ||:  13%|#2        | 14/110 [00:09<01:01,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2857 ||:  14%|#3        | 15/110 [00:09<01:01,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2883 ||:  15%|#4        | 16/110 [00:10<01:03,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2881 ||:  15%|#5        | 17/110 [00:11<01:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2955 ||:  16%|#6        | 18/110 [00:11<01:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2978 ||:  17%|#7        | 19/110 [00:12<01:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2995 ||:  18%|#8        | 20/110 [00:13<01:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2991 ||:  19%|#9        | 21/110 [00:13<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2976 ||:  20%|##        | 22/110 [00:14<01:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2964 ||:  21%|##        | 23/110 [00:15<00:58,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2987 ||:  22%|##1       | 24/110 [00:15<00:56,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3000 ||:  23%|##2       | 25/110 [00:16<00:54,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3036 ||:  24%|##3       | 26/110 [00:17<00:55,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3038 ||:  25%|##4       | 27/110 [00:17<00:55,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3022 ||:  25%|##5       | 28/110 [00:18<00:52,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3049 ||:  26%|##6       | 29/110 [00:19<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3055 ||:  27%|##7       | 30/110 [00:19<00:51,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3073 ||:  28%|##8       | 31/110 [00:20<00:49,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3080 ||:  29%|##9       | 32/110 [00:20<00:48,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3068 ||:  30%|###       | 33/110 [00:21<00:48,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3085 ||:  31%|###       | 34/110 [00:22<00:48,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3084 ||:  32%|###1      | 35/110 [00:22<00:47,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3095 ||:  33%|###2      | 36/110 [00:23<00:46,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3087 ||:  34%|###3      | 37/110 [00:24<00:45,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3086 ||:  35%|###4      | 38/110 [00:24<00:44,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3097 ||:  35%|###5      | 39/110 [00:25<00:47,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3108 ||:  36%|###6      | 40/110 [00:26<01:02,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3119 ||:  37%|###7      | 41/110 [00:27<00:56,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3140 ||:  38%|###8      | 42/110 [00:28<00:51,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3149 ||:  39%|###9      | 43/110 [00:28<00:48,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3161 ||:  40%|####      | 44/110 [00:29<00:45,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3157 ||:  41%|####      | 45/110 [00:30<00:44,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3145 ||:  42%|####1     | 46/110 [00:30<00:44,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3138 ||:  43%|####2     | 47/110 [00:31<00:44,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3136 ||:  44%|####3     | 48/110 [00:32<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3137 ||:  45%|####4     | 49/110 [00:32<00:40,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3128 ||:  45%|####5     | 50/110 [00:33<00:38,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3139 ||:  46%|####6     | 51/110 [00:34<00:38,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3138 ||:  47%|####7     | 52/110 [00:34<00:37,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3144 ||:  48%|####8     | 53/110 [00:35<00:38,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3149 ||:  49%|####9     | 54/110 [00:36<00:38,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3158 ||:  50%|#####     | 55/110 [00:36<00:38,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3174 ||:  51%|#####     | 56/110 [00:37<00:38,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3167 ||:  52%|#####1    | 57/110 [00:38<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3169 ||:  53%|#####2    | 58/110 [00:38<00:34,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3165 ||:  54%|#####3    | 59/110 [00:39<00:38,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3161 ||:  55%|#####4    | 60/110 [00:40<00:38,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3164 ||:  55%|#####5    | 61/110 [00:41<00:37,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3157 ||:  56%|#####6    | 62/110 [00:42<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3160 ||:  57%|#####7    | 63/110 [00:42<00:34,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3165 ||:  58%|#####8    | 64/110 [00:43<00:32,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3164 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3165 ||:  60%|######    | 66/110 [00:44<00:29,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3153 ||:  61%|######    | 67/110 [00:45<00:29,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3161 ||:  62%|######1   | 68/110 [00:46<00:29,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3166 ||:  63%|######2   | 69/110 [00:46<00:27,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3164 ||:  64%|######3   | 70/110 [00:47<00:26,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3155 ||:  65%|######4   | 71/110 [00:48<00:26,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3166 ||:  65%|######5   | 72/110 [00:48<00:24,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3153 ||:  66%|######6   | 73/110 [00:49<00:23,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3147 ||:  67%|######7   | 74/110 [00:49<00:23,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3167 ||:  68%|######8   | 75/110 [00:50<00:22,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3163 ||:  69%|######9   | 76/110 [00:51<00:21,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3167 ||:  70%|#######   | 77/110 [00:52<00:22,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3156 ||:  71%|#######   | 78/110 [00:52<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3153 ||:  72%|#######1  | 79/110 [00:53<00:22,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3154 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3153 ||:  74%|#######3  | 81/110 [00:54<00:19,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3152 ||:  75%|#######4  | 82/110 [00:55<00:19,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3148 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3153 ||:  76%|#######6  | 84/110 [00:56<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3149 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3147 ||:  78%|#######8  | 86/110 [00:57<00:15,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3151 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3150 ||:  80%|########  | 88/110 [00:59<00:14,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3159 ||:  81%|########  | 89/110 [01:00<00:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3166 ||:  82%|########1 | 90/110 [01:00<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3162 ||:  83%|########2 | 91/110 [01:01<00:12,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3170 ||:  84%|########3 | 92/110 [01:02<00:11,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3175 ||:  85%|########4 | 93/110 [01:02<00:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3178 ||:  85%|########5 | 94/110 [01:03<00:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3172 ||:  86%|########6 | 95/110 [01:04<00:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3170 ||:  87%|########7 | 96/110 [01:04<00:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3175 ||:  88%|########8 | 97/110 [01:05<00:08,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3186 ||:  89%|########9 | 98/110 [01:06<00:07,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3189 ||:  90%|######### | 99/110 [01:06<00:07,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3184 ||:  91%|######### | 100/110 [01:07<00:06,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3184 ||:  92%|#########1| 101/110 [01:08<00:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3187 ||:  93%|#########2| 102/110 [01:08<00:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3193 ||:  94%|#########3| 103/110 [01:09<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3194 ||:  95%|#########4| 104/110 [01:10<00:03,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3192 ||:  95%|#########5| 105/110 [01:10<00:03,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3193 ||:  96%|#########6| 106/110 [01:11<00:02,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3190 ||:  97%|#########7| 107/110 [01:11<00:01,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3200 ||:  98%|#########8| 108/110 [01:12<00:01,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3201 ||:  99%|#########9| 109/110 [01:13<00:00,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3194 ||: 100%|##########| 110/110 [01:13<00:00,  1.84it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3194 ||: 100%|##########| 110/110 [01:13<00:00,  1.50it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6675, acc: 0.6875, no_result: 0.1250, loss: 0.6424 ||:   4%|4         | 1/24 [00:00<00:08,  2.76it/s]
+BLEU: 0.6112, acc: 0.7031, no_result: 0.0781, loss: 0.6286 ||:   8%|8         | 2/24 [00:00<00:07,  2.86it/s]
+BLEU: 0.5769, acc: 0.6562, no_result: 0.1146, loss: 0.7310 ||:  12%|#2        | 3/24 [00:01<00:07,  2.88it/s]
+BLEU: 0.6039, acc: 0.6484, no_result: 0.1016, loss: 0.7016 ||:  17%|#6        | 4/24 [00:01<00:07,  2.83it/s]
+BLEU: 0.6337, acc: 0.6625, no_result: 0.0938, loss: 0.6764 ||:  21%|##        | 5/24 [00:01<00:06,  2.79it/s]
+BLEU: 0.6242, acc: 0.6354, no_result: 0.0885, loss: 0.6938 ||:  25%|##5       | 6/24 [00:02<00:06,  2.93it/s]
+BLEU: 0.6278, acc: 0.6473, no_result: 0.0893, loss: 0.6893 ||:  29%|##9       | 7/24 [00:02<00:05,  3.02it/s]
+BLEU: 0.6163, acc: 0.6484, no_result: 0.0898, loss: 0.6915 ||:  33%|###3      | 8/24 [00:02<00:05,  3.05it/s]
+BLEU: 0.6118, acc: 0.6528, no_result: 0.0868, loss: 0.6923 ||:  38%|###7      | 9/24 [00:03<00:04,  3.03it/s]
+BLEU: 0.6314, acc: 0.6594, no_result: 0.0813, loss: 0.7049 ||:  42%|####1     | 10/24 [00:03<00:04,  2.95it/s]
+BLEU: 0.6355, acc: 0.6392, no_result: 0.0881, loss: 0.7372 ||:  46%|####5     | 11/24 [00:03<00:04,  2.81it/s]
+BLEU: 0.6325, acc: 0.6458, no_result: 0.0833, loss: 0.7350 ||:  50%|#####     | 12/24 [00:04<00:04,  2.91it/s]
+BLEU: 0.6276, acc: 0.6442, no_result: 0.0865, loss: 0.7553 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.80it/s]
+BLEU: 0.6294, acc: 0.6295, no_result: 0.0893, loss: 0.7763 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.71it/s]
+BLEU: 0.6077, acc: 0.6125, no_result: 0.1021, loss: 0.8246 ||:  62%|######2   | 15/24 [00:05<00:03,  2.52it/s]
+BLEU: 0.6013, acc: 0.6016, no_result: 0.1113, loss: 0.8446 ||:  67%|######6   | 16/24 [00:05<00:03,  2.47it/s]
+BLEU: 0.6036, acc: 0.5974, no_result: 0.1195, loss: 0.8414 ||:  71%|#######   | 17/24 [00:06<00:02,  2.53it/s]
+BLEU: 0.6038, acc: 0.5955, no_result: 0.1198, loss: 0.8526 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.51it/s]
+BLEU: 0.6068, acc: 0.6003, no_result: 0.1168, loss: 0.8372 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.60it/s]
+BLEU: 0.6092, acc: 0.6031, no_result: 0.1125, loss: 0.8350 ||:  83%|########3 | 20/24 [00:07<00:01,  2.73it/s]
+BLEU: 0.6066, acc: 0.5982, no_result: 0.1101, loss: 0.8336 ||:  88%|########7 | 21/24 [00:07<00:01,  2.71it/s]
+BLEU: 0.6130, acc: 0.5952, no_result: 0.1108, loss: 0.8297 ||:  92%|#########1| 22/24 [00:08<00:00,  2.68it/s]
+BLEU: 0.6173, acc: 0.6033, no_result: 0.1060, loss: 0.8168 ||:  96%|#########5| 23/24 [00:08<00:00,  2.79it/s]
+BLEU: 0.6176, acc: 0.5966, no_result: 0.1108, loss: 0.8278 ||: 100%|##########| 24/24 [00:08<00:00,  3.44it/s]
+BLEU: 0.6176, acc: 0.5966, no_result: 0.1108, loss: 0.8278 ||: 100%|##########| 24/24 [00:08<00:00,  2.84it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2711 ||:   1%|          | 1/110 [00:00<01:18,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2775 ||:   2%|1         | 2/110 [00:01<01:19,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2683 ||:   3%|2         | 3/110 [00:02<01:11,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2809 ||:   4%|3         | 4/110 [00:02<01:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2786 ||:   5%|4         | 5/110 [00:03<01:11,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2870 ||:   5%|5         | 6/110 [00:04<01:08,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2883 ||:   6%|6         | 7/110 [00:04<01:12,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2891 ||:   7%|7         | 8/110 [00:05<01:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2895 ||:   8%|8         | 9/110 [00:06<01:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2914 ||:   9%|9         | 10/110 [00:07<01:12,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2971 ||:  10%|#         | 11/110 [00:07<01:10,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2900 ||:  11%|#         | 12/110 [00:08<01:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2893 ||:  12%|#1        | 13/110 [00:08<01:05,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2876 ||:  13%|#2        | 14/110 [00:09<01:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2887 ||:  14%|#3        | 15/110 [00:10<01:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2880 ||:  15%|#4        | 16/110 [00:11<01:08,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2845 ||:  15%|#5        | 17/110 [00:11<01:05,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2872 ||:  16%|#6        | 18/110 [00:12<01:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2867 ||:  17%|#7        | 19/110 [00:13<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2895 ||:  18%|#8        | 20/110 [00:13<00:59,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2875 ||:  19%|#9        | 21/110 [00:14<00:59,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2863 ||:  20%|##        | 22/110 [00:15<00:57,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2875 ||:  21%|##        | 23/110 [00:16<01:03,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2877 ||:  22%|##1       | 24/110 [00:16<01:00,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2917 ||:  23%|##2       | 25/110 [00:17<00:59,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2914 ||:  24%|##3       | 26/110 [00:17<00:56,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2945 ||:  25%|##4       | 27/110 [00:18<00:53,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2921 ||:  25%|##5       | 28/110 [00:19<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2887 ||:  26%|##6       | 29/110 [00:19<00:50,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2876 ||:  27%|##7       | 30/110 [00:21<01:08,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2867 ||:  28%|##8       | 31/110 [00:21<01:02,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2861 ||:  29%|##9       | 32/110 [00:22<00:56,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2838 ||:  30%|###       | 33/110 [00:22<00:52,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2830 ||:  31%|###       | 34/110 [00:23<00:51,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2829 ||:  32%|###1      | 35/110 [00:24<00:49,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2835 ||:  33%|###2      | 36/110 [00:24<00:48,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2844 ||:  34%|###3      | 37/110 [00:25<00:46,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2845 ||:  35%|###4      | 38/110 [00:26<00:48,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2851 ||:  35%|###5      | 39/110 [00:26<00:47,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2841 ||:  36%|###6      | 40/110 [00:27<00:45,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2870 ||:  37%|###7      | 41/110 [00:28<00:44,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2856 ||:  38%|###8      | 42/110 [00:28<00:42,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2848 ||:  39%|###9      | 43/110 [00:29<00:43,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2852 ||:  40%|####      | 44/110 [00:30<00:42,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2838 ||:  41%|####      | 45/110 [00:30<00:43,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2840 ||:  42%|####1     | 46/110 [00:31<00:42,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2837 ||:  43%|####2     | 47/110 [00:32<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2841 ||:  44%|####3     | 48/110 [00:32<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2839 ||:  45%|####4     | 49/110 [00:33<00:41,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2840 ||:  45%|####5     | 50/110 [00:34<00:39,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2855 ||:  46%|####6     | 51/110 [00:34<00:41,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2854 ||:  47%|####7     | 52/110 [00:35<00:38,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2859 ||:  48%|####8     | 53/110 [00:36<00:39,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2864 ||:  49%|####9     | 54/110 [00:36<00:39,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2864 ||:  50%|#####     | 55/110 [00:37<00:37,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2867 ||:  51%|#####     | 56/110 [00:38<00:38,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2864 ||:  52%|#####1    | 57/110 [00:38<00:36,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2872 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2867 ||:  54%|#####3    | 59/110 [00:40<00:34,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2862 ||:  55%|#####4    | 60/110 [00:40<00:32,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2857 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2862 ||:  56%|#####6    | 62/110 [00:42<00:31,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2871 ||:  57%|#####7    | 63/110 [00:42<00:29,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2876 ||:  58%|#####8    | 64/110 [00:43<00:29,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2889 ||:  59%|#####9    | 65/110 [00:44<00:29,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2885 ||:  60%|######    | 66/110 [00:44<00:29,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2879 ||:  61%|######    | 67/110 [00:45<00:30,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2889 ||:  62%|######1   | 68/110 [00:46<00:28,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2896 ||:  63%|######2   | 69/110 [00:46<00:26,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2911 ||:  64%|######3   | 70/110 [00:47<00:26,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2922 ||:  65%|######4   | 71/110 [00:48<00:26,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2937 ||:  65%|######5   | 72/110 [00:48<00:25,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2940 ||:  66%|######6   | 73/110 [00:49<00:25,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2933 ||:  67%|######7   | 74/110 [00:50<00:24,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2924 ||:  68%|######8   | 75/110 [00:50<00:24,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2919 ||:  69%|######9   | 76/110 [00:51<00:23,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2935 ||:  70%|#######   | 77/110 [00:52<00:23,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2927 ||:  71%|#######   | 78/110 [00:52<00:21,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2937 ||:  72%|#######1  | 79/110 [00:53<00:21,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2940 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2938 ||:  74%|#######3  | 81/110 [00:54<00:19,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2944 ||:  75%|#######4  | 82/110 [00:55<00:17,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2949 ||:  75%|#######5  | 83/110 [00:56<00:19,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2951 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2954 ||:  77%|#######7  | 85/110 [00:57<00:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2961 ||:  78%|#######8  | 86/110 [00:58<00:17,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2957 ||:  79%|#######9  | 87/110 [00:59<00:16,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2957 ||:  80%|########  | 88/110 [01:00<00:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2955 ||:  81%|########  | 89/110 [01:00<00:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2957 ||:  82%|########1 | 90/110 [01:01<00:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2951 ||:  83%|########2 | 91/110 [01:01<00:12,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2962 ||:  84%|########3 | 92/110 [01:02<00:12,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2961 ||:  85%|########4 | 93/110 [01:03<00:10,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2961 ||:  85%|########5 | 94/110 [01:03<00:10,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2968 ||:  86%|########6 | 95/110 [01:04<00:09,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2981 ||:  87%|########7 | 96/110 [01:05<00:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2981 ||:  88%|########8 | 97/110 [01:05<00:08,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2992 ||:  89%|########9 | 98/110 [01:06<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3001 ||:  90%|######### | 99/110 [01:07<00:07,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3000 ||:  91%|######### | 100/110 [01:07<00:06,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3001 ||:  92%|#########1| 101/110 [01:08<00:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3000 ||:  93%|#########2| 102/110 [01:09<00:05,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3005 ||:  94%|#########3| 103/110 [01:09<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3002 ||:  95%|#########4| 104/110 [01:10<00:03,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3004 ||:  95%|#########5| 105/110 [01:11<00:03,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3015 ||:  96%|#########6| 106/110 [01:11<00:02,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3025 ||:  97%|#########7| 107/110 [01:12<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3037 ||:  98%|#########8| 108/110 [01:13<00:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3038 ||:  99%|#########9| 109/110 [01:13<00:00,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3039 ||: 100%|##########| 110/110 [01:14<00:00,  1.67it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3039 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6851, acc: 0.5938, no_result: 0.0312, loss: 0.7497 ||:   4%|4         | 1/24 [00:00<00:08,  2.78it/s]
+BLEU: 0.5995, acc: 0.6875, no_result: 0.0469, loss: 0.7259 ||:   8%|8         | 2/24 [00:00<00:07,  2.83it/s]
+BLEU: 0.5926, acc: 0.6771, no_result: 0.0729, loss: 0.7892 ||:  12%|#2        | 3/24 [00:01<00:07,  2.85it/s]
+BLEU: 0.6342, acc: 0.6797, no_result: 0.0703, loss: 0.7544 ||:  17%|#6        | 4/24 [00:01<00:07,  2.79it/s]
+BLEU: 0.6702, acc: 0.6937, no_result: 0.0688, loss: 0.7172 ||:  21%|##        | 5/24 [00:01<00:06,  2.75it/s]
+BLEU: 0.6482, acc: 0.6667, no_result: 0.0729, loss: 0.7316 ||:  25%|##5       | 6/24 [00:02<00:06,  2.86it/s]
+BLEU: 0.6474, acc: 0.6741, no_result: 0.0714, loss: 0.7251 ||:  29%|##9       | 7/24 [00:02<00:05,  2.99it/s]
+BLEU: 0.6429, acc: 0.6719, no_result: 0.0742, loss: 0.7394 ||:  33%|###3      | 8/24 [00:02<00:05,  3.01it/s]
+BLEU: 0.6303, acc: 0.6771, no_result: 0.0729, loss: 0.7323 ||:  38%|###7      | 9/24 [00:03<00:05,  2.97it/s]
+BLEU: 0.6469, acc: 0.6750, no_result: 0.0656, loss: 0.7415 ||:  42%|####1     | 10/24 [00:03<00:04,  2.89it/s]
+BLEU: 0.6474, acc: 0.6506, no_result: 0.0767, loss: 0.7706 ||:  46%|####5     | 11/24 [00:03<00:04,  2.75it/s]
+BLEU: 0.6387, acc: 0.6458, no_result: 0.0729, loss: 0.7843 ||:  50%|#####     | 12/24 [00:04<00:04,  2.84it/s]
+BLEU: 0.6352, acc: 0.6466, no_result: 0.0817, loss: 0.8075 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.72it/s]
+BLEU: 0.6391, acc: 0.6339, no_result: 0.0893, loss: 0.8279 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.61it/s]
+BLEU: 0.6274, acc: 0.6188, no_result: 0.1104, loss: 0.8732 ||:  62%|######2   | 15/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.6202, acc: 0.6074, no_result: 0.1230, loss: 0.8853 ||:  67%|######6   | 16/24 [00:05<00:03,  2.39it/s]
+BLEU: 0.6254, acc: 0.6011, no_result: 0.1305, loss: 0.8824 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.6232, acc: 0.5990, no_result: 0.1354, loss: 0.8904 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.6253, acc: 0.6053, no_result: 0.1332, loss: 0.8723 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.6252, acc: 0.6094, no_result: 0.1297, loss: 0.8680 ||:  83%|########3 | 20/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.6183, acc: 0.6042, no_result: 0.1250, loss: 0.8647 ||:  88%|########7 | 21/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.6223, acc: 0.6009, no_result: 0.1236, loss: 0.8577 ||:  92%|#########1| 22/24 [00:08<00:00,  2.67it/s]
+BLEU: 0.6252, acc: 0.6101, no_result: 0.1209, loss: 0.8441 ||:  96%|#########5| 23/24 [00:08<00:00,  2.74it/s]
+BLEU: 0.6247, acc: 0.6032, no_result: 0.1251, loss: 0.8511 ||: 100%|##########| 24/24 [00:08<00:00,  3.37it/s]
+BLEU: 0.6247, acc: 0.6032, no_result: 0.1251, loss: 0.8511 ||: 100%|##########| 24/24 [00:08<00:00,  2.78it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2467 ||:   1%|          | 1/110 [00:00<01:10,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2427 ||:   2%|1         | 2/110 [00:01<01:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2559 ||:   3%|2         | 3/110 [00:01<01:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2730 ||:   4%|3         | 4/110 [00:02<01:07,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2707 ||:   5%|4         | 5/110 [00:03<01:05,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2736 ||:   5%|5         | 6/110 [00:03<01:06,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2754 ||:   6%|6         | 7/110 [00:04<01:04,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2744 ||:   7%|7         | 8/110 [00:05<01:06,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2746 ||:   8%|8         | 9/110 [00:05<01:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2698 ||:   9%|9         | 10/110 [00:06<01:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2752 ||:  10%|#         | 11/110 [00:07<01:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2767 ||:  11%|#         | 12/110 [00:07<01:05,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2787 ||:  12%|#1        | 13/110 [00:08<01:04,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2814 ||:  13%|#2        | 14/110 [00:09<01:03,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2821 ||:  14%|#3        | 15/110 [00:09<01:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2858 ||:  15%|#4        | 16/110 [00:10<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2847 ||:  15%|#5        | 17/110 [00:11<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2901 ||:  16%|#6        | 18/110 [00:11<01:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2851 ||:  17%|#7        | 19/110 [00:12<00:59,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2850 ||:  18%|#8        | 20/110 [00:13<01:23,  1.08it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2839 ||:  19%|#9        | 21/110 [00:14<01:15,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2819 ||:  20%|##        | 22/110 [00:15<01:14,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2781 ||:  21%|##        | 23/110 [00:16<01:06,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2812 ||:  22%|##1       | 24/110 [00:16<01:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2792 ||:  23%|##2       | 25/110 [00:17<01:02,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2811 ||:  24%|##3       | 26/110 [00:18<01:02,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2803 ||:  25%|##4       | 27/110 [00:18<01:02,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2806 ||:  25%|##5       | 28/110 [00:19<00:56,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2800 ||:  26%|##6       | 29/110 [00:20<00:55,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2780 ||:  27%|##7       | 30/110 [00:20<00:53,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2815 ||:  28%|##8       | 31/110 [00:21<00:53,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2828 ||:  29%|##9       | 32/110 [00:22<00:51,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2816 ||:  30%|###       | 33/110 [00:22<00:50,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2808 ||:  31%|###       | 34/110 [00:23<00:49,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2802 ||:  32%|###1      | 35/110 [00:24<00:54,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2817 ||:  33%|###2      | 36/110 [00:25<00:53,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2825 ||:  34%|###3      | 37/110 [00:25<00:50,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2832 ||:  35%|###4      | 38/110 [00:26<00:49,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2831 ||:  35%|###5      | 39/110 [00:27<00:51,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2826 ||:  36%|###6      | 40/110 [00:27<00:49,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2829 ||:  37%|###7      | 41/110 [00:28<00:48,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2817 ||:  38%|###8      | 42/110 [00:29<00:47,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2813 ||:  39%|###9      | 43/110 [00:29<00:47,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2802 ||:  40%|####      | 44/110 [00:30<00:44,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2796 ||:  41%|####      | 45/110 [00:31<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2804 ||:  42%|####1     | 46/110 [00:32<00:45,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2807 ||:  43%|####2     | 47/110 [00:32<00:43,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2797 ||:  44%|####3     | 48/110 [00:33<00:40,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2800 ||:  45%|####4     | 49/110 [00:33<00:39,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2802 ||:  45%|####5     | 50/110 [00:34<00:41,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2798 ||:  46%|####6     | 51/110 [00:35<00:39,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2801 ||:  47%|####7     | 52/110 [00:36<00:40,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2800 ||:  48%|####8     | 53/110 [00:36<00:39,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2815 ||:  49%|####9     | 54/110 [00:37<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2806 ||:  50%|#####     | 55/110 [00:38<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2805 ||:  51%|#####     | 56/110 [00:38<00:36,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2817 ||:  52%|#####1    | 57/110 [00:39<00:34,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2812 ||:  53%|#####2    | 58/110 [00:40<00:33,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2820 ||:  54%|#####3    | 59/110 [00:40<00:32,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2821 ||:  55%|#####4    | 60/110 [00:41<00:32,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2815 ||:  55%|#####5    | 61/110 [00:41<00:31,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2810 ||:  56%|#####6    | 62/110 [00:42<00:30,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2813 ||:  57%|#####7    | 63/110 [00:43<00:31,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2821 ||:  58%|#####8    | 64/110 [00:44<00:31,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2807 ||:  59%|#####9    | 65/110 [00:44<00:29,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2800 ||:  60%|######    | 66/110 [00:45<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2802 ||:  61%|######    | 67/110 [00:46<00:28,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2808 ||:  62%|######1   | 68/110 [00:46<00:27,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2815 ||:  63%|######2   | 69/110 [00:47<00:26,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2817 ||:  64%|######3   | 70/110 [00:48<00:27,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2824 ||:  65%|######4   | 71/110 [00:48<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2815 ||:  65%|######5   | 72/110 [00:49<00:25,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2819 ||:  66%|######6   | 73/110 [00:50<00:24,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2817 ||:  67%|######7   | 74/110 [00:50<00:25,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2838 ||:  68%|######8   | 75/110 [00:51<00:23,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2836 ||:  69%|######9   | 76/110 [00:52<00:23,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2848 ||:  70%|#######   | 77/110 [00:52<00:22,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2854 ||:  71%|#######   | 78/110 [00:53<00:21,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2860 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2867 ||:  73%|#######2  | 80/110 [00:54<00:21,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2874 ||:  74%|#######3  | 81/110 [00:55<00:20,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2866 ||:  75%|#######4  | 82/110 [00:56<00:19,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2868 ||:  75%|#######5  | 83/110 [00:56<00:18,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2864 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2862 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2861 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2872 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2873 ||:  80%|########  | 88/110 [01:00<00:14,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2873 ||:  81%|########  | 89/110 [01:00<00:13,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2876 ||:  82%|########1 | 90/110 [01:01<00:12,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2881 ||:  83%|########2 | 91/110 [01:02<00:13,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2874 ||:  84%|########3 | 92/110 [01:02<00:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2873 ||:  85%|########4 | 93/110 [01:03<00:12,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2873 ||:  85%|########5 | 94/110 [01:04<00:11,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2869 ||:  86%|########6 | 95/110 [01:05<00:10,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2868 ||:  87%|########7 | 96/110 [01:05<00:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2866 ||:  88%|########8 | 97/110 [01:06<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2875 ||:  89%|########9 | 98/110 [01:07<00:08,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2878 ||:  90%|######### | 99/110 [01:07<00:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2876 ||:  91%|######### | 100/110 [01:08<00:06,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2874 ||:  92%|#########1| 101/110 [01:09<00:05,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2878 ||:  93%|#########2| 102/110 [01:09<00:05,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2879 ||:  94%|#########3| 103/110 [01:10<00:04,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2885 ||:  95%|#########4| 104/110 [01:11<00:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2895 ||:  95%|#########5| 105/110 [01:11<00:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2894 ||:  96%|#########6| 106/110 [01:12<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2896 ||:  97%|#########7| 107/110 [01:13<00:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2901 ||:  98%|#########8| 108/110 [01:13<00:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2902 ||:  99%|#########9| 109/110 [01:14<00:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2914 ||: 100%|##########| 110/110 [01:14<00:00,  1.75it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2914 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6709, acc: 0.7812, no_result: 0.0000, loss: 0.7110 ||:   4%|4         | 1/24 [00:00<00:08,  2.79it/s]
+BLEU: 0.6319, acc: 0.7656, no_result: 0.0312, loss: 0.6631 ||:   8%|8         | 2/24 [00:00<00:07,  2.84it/s]
+BLEU: 0.6070, acc: 0.6875, no_result: 0.0625, loss: 0.7502 ||:  12%|#2        | 3/24 [00:01<00:07,  2.89it/s]
+BLEU: 0.6404, acc: 0.6797, no_result: 0.0625, loss: 0.7272 ||:  17%|#6        | 4/24 [00:01<00:07,  2.82it/s]
+BLEU: 0.6740, acc: 0.6937, no_result: 0.0688, loss: 0.7255 ||:  21%|##        | 5/24 [00:01<00:06,  2.76it/s]
+BLEU: 0.6785, acc: 0.6562, no_result: 0.0677, loss: 0.7334 ||:  25%|##5       | 6/24 [00:02<00:06,  2.88it/s]
+BLEU: 0.6797, acc: 0.6652, no_result: 0.0714, loss: 0.7212 ||:  29%|##9       | 7/24 [00:02<00:05,  2.99it/s]
+BLEU: 0.6773, acc: 0.6641, no_result: 0.0781, loss: 0.7351 ||:  33%|###3      | 8/24 [00:02<00:05,  2.99it/s]
+BLEU: 0.6661, acc: 0.6736, no_result: 0.0799, loss: 0.7315 ||:  38%|###7      | 9/24 [00:03<00:05,  2.95it/s]
+BLEU: 0.6720, acc: 0.6625, no_result: 0.0781, loss: 0.7441 ||:  42%|####1     | 10/24 [00:03<00:04,  2.88it/s]
+BLEU: 0.6720, acc: 0.6477, no_result: 0.0795, loss: 0.7718 ||:  46%|####5     | 11/24 [00:03<00:04,  2.75it/s]
+BLEU: 0.6684, acc: 0.6484, no_result: 0.0755, loss: 0.7723 ||:  50%|#####     | 12/24 [00:04<00:04,  2.84it/s]
+BLEU: 0.6605, acc: 0.6394, no_result: 0.0865, loss: 0.7924 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.72it/s]
+BLEU: 0.6617, acc: 0.6272, no_result: 0.0938, loss: 0.8172 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.64it/s]
+BLEU: 0.6464, acc: 0.6062, no_result: 0.1083, loss: 0.8580 ||:  62%|######2   | 15/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.6373, acc: 0.5996, no_result: 0.1152, loss: 0.8831 ||:  67%|######6   | 16/24 [00:05<00:03,  2.39it/s]
+BLEU: 0.6383, acc: 0.5956, no_result: 0.1213, loss: 0.8794 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.6366, acc: 0.6007, no_result: 0.1215, loss: 0.8846 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.48it/s]
+BLEU: 0.6411, acc: 0.6086, no_result: 0.1184, loss: 0.8665 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.57it/s]
+BLEU: 0.6423, acc: 0.6109, no_result: 0.1141, loss: 0.8656 ||:  83%|########3 | 20/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.6384, acc: 0.6042, no_result: 0.1116, loss: 0.8675 ||:  88%|########7 | 21/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.6419, acc: 0.5994, no_result: 0.1122, loss: 0.8651 ||:  92%|#########1| 22/24 [00:08<00:00,  2.67it/s]
+BLEU: 0.6481, acc: 0.6087, no_result: 0.1087, loss: 0.8491 ||:  96%|#########5| 23/24 [00:08<00:00,  2.75it/s]
+BLEU: 0.6481, acc: 0.6019, no_result: 0.1088, loss: 0.8601 ||: 100%|##########| 24/24 [00:08<00:00,  3.40it/s]
+BLEU: 0.6481, acc: 0.6019, no_result: 0.1088, loss: 0.8601 ||: 100%|##########| 24/24 [00:08<00:00,  2.79it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2609 ||:   1%|          | 1/110 [00:00<01:27,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2708 ||:   2%|1         | 2/110 [00:01<01:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2660 ||:   3%|2         | 3/110 [00:02<01:16,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2507 ||:   4%|3         | 4/110 [00:02<01:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2401 ||:   5%|4         | 5/110 [00:03<01:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2549 ||:   5%|5         | 6/110 [00:04<01:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2570 ||:   6%|6         | 7/110 [00:04<01:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2587 ||:   7%|7         | 8/110 [00:05<01:08,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2632 ||:   8%|8         | 9/110 [00:06<01:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2692 ||:   9%|9         | 10/110 [00:07<01:33,  1.07it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2686 ||:  10%|#         | 11/110 [00:08<01:21,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2602 ||:  11%|#         | 12/110 [00:08<01:17,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2560 ||:  12%|#1        | 13/110 [00:09<01:11,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2508 ||:  13%|#2        | 14/110 [00:10<01:10,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2514 ||:  14%|#3        | 15/110 [00:11<01:11,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2563 ||:  15%|#4        | 16/110 [00:11<01:11,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2557 ||:  15%|#5        | 17/110 [00:12<01:06,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2534 ||:  16%|#6        | 18/110 [00:13<01:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2545 ||:  17%|#7        | 19/110 [00:13<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2505 ||:  18%|#8        | 20/110 [00:14<01:01,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2509 ||:  19%|#9        | 21/110 [00:14<00:57,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2540 ||:  20%|##        | 22/110 [00:15<00:59,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2526 ||:  21%|##        | 23/110 [00:16<01:01,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2536 ||:  22%|##1       | 24/110 [00:17<00:58,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2536 ||:  23%|##2       | 25/110 [00:17<00:58,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2574 ||:  24%|##3       | 26/110 [00:18<00:56,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2559 ||:  25%|##4       | 27/110 [00:19<00:56,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2547 ||:  25%|##5       | 28/110 [00:19<00:54,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2536 ||:  26%|##6       | 29/110 [00:20<00:54,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2540 ||:  27%|##7       | 30/110 [00:21<00:54,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2521 ||:  28%|##8       | 31/110 [00:21<00:53,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2532 ||:  29%|##9       | 32/110 [00:22<00:51,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2544 ||:  30%|###       | 33/110 [00:23<00:51,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2531 ||:  31%|###       | 34/110 [00:23<00:48,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2516 ||:  32%|###1      | 35/110 [00:24<00:50,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2523 ||:  33%|###2      | 36/110 [00:25<00:48,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2514 ||:  34%|###3      | 37/110 [00:25<00:49,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2532 ||:  35%|###4      | 38/110 [00:26<00:47,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2543 ||:  35%|###5      | 39/110 [00:27<00:46,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2531 ||:  36%|###6      | 40/110 [00:27<00:49,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2540 ||:  37%|###7      | 41/110 [00:28<00:47,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2550 ||:  38%|###8      | 42/110 [00:29<00:46,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2560 ||:  39%|###9      | 43/110 [00:29<00:44,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2540 ||:  40%|####      | 44/110 [00:30<00:43,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2540 ||:  41%|####      | 45/110 [00:31<00:47,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2549 ||:  42%|####1     | 46/110 [00:31<00:43,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2552 ||:  43%|####2     | 47/110 [00:32<00:41,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2549 ||:  44%|####3     | 48/110 [00:33<00:43,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2550 ||:  45%|####4     | 49/110 [00:33<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2560 ||:  45%|####5     | 50/110 [00:34<00:43,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2566 ||:  46%|####6     | 51/110 [00:35<00:40,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2569 ||:  47%|####7     | 52/110 [00:36<00:39,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2567 ||:  48%|####8     | 53/110 [00:36<00:38,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2558 ||:  49%|####9     | 54/110 [00:37<00:37,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2552 ||:  50%|#####     | 55/110 [00:38<00:36,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2559 ||:  51%|#####     | 56/110 [00:38<00:35,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2561 ||:  52%|#####1    | 57/110 [00:39<00:34,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2568 ||:  53%|#####2    | 58/110 [00:39<00:33,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2557 ||:  54%|#####3    | 59/110 [00:40<00:32,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2564 ||:  55%|#####4    | 60/110 [00:41<00:32,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2563 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2574 ||:  56%|#####6    | 62/110 [00:42<00:34,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2573 ||:  57%|#####7    | 63/110 [00:43<00:31,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2592 ||:  58%|#####8    | 64/110 [00:44<00:30,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2598 ||:  59%|#####9    | 65/110 [00:44<00:29,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2594 ||:  60%|######    | 66/110 [00:45<00:28,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2592 ||:  61%|######    | 67/110 [00:45<00:28,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2587 ||:  62%|######1   | 68/110 [00:46<00:28,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2583 ||:  63%|######2   | 69/110 [00:47<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2581 ||:  64%|######3   | 70/110 [00:47<00:26,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2586 ||:  65%|######4   | 71/110 [00:48<00:24,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2582 ||:  65%|######5   | 72/110 [00:49<00:24,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2593 ||:  66%|######6   | 73/110 [00:49<00:24,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2600 ||:  67%|######7   | 74/110 [00:50<00:24,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2608 ||:  68%|######8   | 75/110 [00:51<00:23,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2621 ||:  69%|######9   | 76/110 [00:51<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2621 ||:  70%|#######   | 77/110 [00:52<00:22,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2632 ||:  71%|#######   | 78/110 [00:53<00:21,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2641 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2647 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2658 ||:  74%|#######3  | 81/110 [00:55<00:19,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2657 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2658 ||:  75%|#######5  | 83/110 [00:56<00:19,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2662 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2661 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2653 ||:  78%|#######8  | 86/110 [00:58<00:15,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2647 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2649 ||:  80%|########  | 88/110 [01:00<00:15,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2647 ||:  81%|########  | 89/110 [01:00<00:13,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2655 ||:  82%|########1 | 90/110 [01:01<00:13,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2642 ||:  83%|########2 | 91/110 [01:02<00:12,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2654 ||:  84%|########3 | 92/110 [01:02<00:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2653 ||:  85%|########4 | 93/110 [01:03<00:11,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2655 ||:  85%|########5 | 94/110 [01:04<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2660 ||:  86%|########6 | 95/110 [01:04<00:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2666 ||:  87%|########7 | 96/110 [01:05<00:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2664 ||:  88%|########8 | 97/110 [01:05<00:08,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2667 ||:  89%|########9 | 98/110 [01:06<00:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2676 ||:  90%|######### | 99/110 [01:07<00:07,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2675 ||:  91%|######### | 100/110 [01:07<00:06,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2674 ||:  92%|#########1| 101/110 [01:08<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2676 ||:  93%|#########2| 102/110 [01:09<00:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2671 ||:  94%|#########3| 103/110 [01:09<00:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2675 ||:  95%|#########4| 104/110 [01:10<00:04,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2674 ||:  95%|#########5| 105/110 [01:11<00:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2676 ||:  96%|#########6| 106/110 [01:11<00:02,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2674 ||:  97%|#########7| 107/110 [01:12<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2678 ||:  98%|#########8| 108/110 [01:13<00:01,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2685 ||:  99%|#########9| 109/110 [01:13<00:00,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2686 ||: 100%|##########| 110/110 [01:14<00:00,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2686 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6572, acc: 0.6875, no_result: 0.0938, loss: 0.6186 ||:   4%|4         | 1/24 [00:00<00:08,  2.82it/s]
+BLEU: 0.6138, acc: 0.6875, no_result: 0.0781, loss: 0.6244 ||:   8%|8         | 2/24 [00:00<00:07,  2.90it/s]
+BLEU: 0.5846, acc: 0.6250, no_result: 0.1354, loss: 0.7525 ||:  12%|#2        | 3/24 [00:01<00:07,  2.89it/s]
+BLEU: 0.6299, acc: 0.6328, no_result: 0.1250, loss: 0.7396 ||:  17%|#6        | 4/24 [00:01<00:07,  2.80it/s]
+BLEU: 0.6588, acc: 0.6625, no_result: 0.1125, loss: 0.7149 ||:  21%|##        | 5/24 [00:01<00:06,  2.77it/s]
+BLEU: 0.6703, acc: 0.6562, no_result: 0.1042, loss: 0.7331 ||:  25%|##5       | 6/24 [00:02<00:06,  2.89it/s]
+BLEU: 0.6736, acc: 0.6607, no_result: 0.0982, loss: 0.7309 ||:  29%|##9       | 7/24 [00:02<00:05,  3.00it/s]
+BLEU: 0.6733, acc: 0.6680, no_result: 0.0977, loss: 0.7373 ||:  33%|###3      | 8/24 [00:02<00:05,  3.03it/s]
+BLEU: 0.6638, acc: 0.6632, no_result: 0.1007, loss: 0.7347 ||:  38%|###7      | 9/24 [00:03<00:05,  2.98it/s]
+BLEU: 0.6759, acc: 0.6656, no_result: 0.0938, loss: 0.7475 ||:  42%|####1     | 10/24 [00:03<00:04,  2.91it/s]
+BLEU: 0.6727, acc: 0.6506, no_result: 0.0909, loss: 0.7796 ||:  46%|####5     | 11/24 [00:03<00:04,  2.79it/s]
+BLEU: 0.6695, acc: 0.6484, no_result: 0.0885, loss: 0.7747 ||:  50%|#####     | 12/24 [00:04<00:04,  2.88it/s]
+BLEU: 0.6588, acc: 0.6442, no_result: 0.0938, loss: 0.7976 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.76it/s]
+BLEU: 0.6636, acc: 0.6272, no_result: 0.0982, loss: 0.8193 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.68it/s]
+BLEU: 0.6425, acc: 0.6188, no_result: 0.1104, loss: 0.8627 ||:  62%|######2   | 15/24 [00:05<00:03,  2.50it/s]
+BLEU: 0.6350, acc: 0.6113, no_result: 0.1211, loss: 0.8798 ||:  67%|######6   | 16/24 [00:05<00:03,  2.43it/s]
+BLEU: 0.6330, acc: 0.6066, no_result: 0.1268, loss: 0.8758 ||:  71%|#######   | 17/24 [00:06<00:02,  2.49it/s]
+BLEU: 0.6334, acc: 0.6059, no_result: 0.1267, loss: 0.8836 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.49it/s]
+BLEU: 0.6354, acc: 0.6102, no_result: 0.1217, loss: 0.8662 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.59it/s]
+BLEU: 0.6363, acc: 0.6156, no_result: 0.1187, loss: 0.8598 ||:  83%|########3 | 20/24 [00:07<00:01,  2.72it/s]
+BLEU: 0.6291, acc: 0.6086, no_result: 0.1190, loss: 0.8623 ||:  88%|########7 | 21/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.6343, acc: 0.6080, no_result: 0.1179, loss: 0.8588 ||:  92%|#########1| 22/24 [00:08<00:00,  2.66it/s]
+BLEU: 0.6371, acc: 0.6155, no_result: 0.1141, loss: 0.8450 ||:  96%|#########5| 23/24 [00:08<00:00,  2.75it/s]
+BLEU: 0.6363, acc: 0.6130, no_result: 0.1140, loss: 0.8516 ||: 100%|##########| 24/24 [00:08<00:00,  3.40it/s]
+BLEU: 0.6363, acc: 0.6130, no_result: 0.1140, loss: 0.8516 ||: 100%|##########| 24/24 [00:08<00:00,  2.81it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2423 ||:   1%|          | 1/110 [00:00<01:35,  1.14it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2354 ||:   2%|1         | 2/110 [00:01<01:17,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2238 ||:   3%|2         | 3/110 [00:02<01:17,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2195 ||:   4%|3         | 4/110 [00:03<01:21,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2233 ||:   5%|4         | 5/110 [00:03<01:18,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2185 ||:   5%|5         | 6/110 [00:04<01:13,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2267 ||:   6%|6         | 7/110 [00:05<01:13,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2291 ||:   7%|7         | 8/110 [00:05<01:10,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2286 ||:   8%|8         | 9/110 [00:06<01:10,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2375 ||:   9%|9         | 10/110 [00:07<01:10,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2390 ||:  10%|#         | 11/110 [00:07<01:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2400 ||:  11%|#         | 12/110 [00:08<01:05,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2360 ||:  12%|#1        | 13/110 [00:09<01:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2329 ||:  13%|#2        | 14/110 [00:09<01:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2328 ||:  14%|#3        | 15/110 [00:10<01:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2359 ||:  15%|#4        | 16/110 [00:11<01:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2366 ||:  15%|#5        | 17/110 [00:11<01:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2377 ||:  16%|#6        | 18/110 [00:12<01:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2395 ||:  17%|#7        | 19/110 [00:13<01:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2397 ||:  18%|#8        | 20/110 [00:13<00:57,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2429 ||:  19%|#9        | 21/110 [00:14<00:56,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2430 ||:  20%|##        | 22/110 [00:14<00:54,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2416 ||:  21%|##        | 23/110 [00:15<00:53,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2415 ||:  22%|##1       | 24/110 [00:16<00:53,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2436 ||:  23%|##2       | 25/110 [00:16<00:52,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2429 ||:  24%|##3       | 26/110 [00:17<00:51,  1.65it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2428 ||:  25%|##4       | 27/110 [00:18<00:51,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2445 ||:  25%|##5       | 28/110 [00:18<00:54,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2423 ||:  26%|##6       | 29/110 [00:19<00:53,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2430 ||:  27%|##7       | 30/110 [00:20<00:51,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2448 ||:  28%|##8       | 31/110 [00:20<00:50,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2435 ||:  29%|##9       | 32/110 [00:21<00:48,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2440 ||:  30%|###       | 33/110 [00:21<00:49,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2443 ||:  31%|###       | 34/110 [00:22<00:48,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2425 ||:  32%|###1      | 35/110 [00:23<00:47,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2440 ||:  33%|###2      | 36/110 [00:23<00:48,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2431 ||:  34%|###3      | 37/110 [00:24<00:53,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2447 ||:  35%|###4      | 38/110 [00:25<00:53,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2459 ||:  35%|###5      | 39/110 [00:26<00:49,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2450 ||:  36%|###6      | 40/110 [00:26<00:47,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2444 ||:  37%|###7      | 41/110 [00:27<00:45,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2430 ||:  38%|###8      | 42/110 [00:28<00:44,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2448 ||:  39%|###9      | 43/110 [00:28<00:43,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2447 ||:  40%|####      | 44/110 [00:29<00:42,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2444 ||:  41%|####      | 45/110 [00:30<00:42,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2451 ||:  42%|####1     | 46/110 [00:30<00:41,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2444 ||:  43%|####2     | 47/110 [00:31<00:41,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2441 ||:  44%|####3     | 48/110 [00:32<00:41,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2445 ||:  45%|####4     | 49/110 [00:32<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2455 ||:  45%|####5     | 50/110 [00:33<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2462 ||:  46%|####6     | 51/110 [00:34<00:39,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2465 ||:  47%|####7     | 52/110 [00:34<00:38,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2461 ||:  48%|####8     | 53/110 [00:35<00:39,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2476 ||:  49%|####9     | 54/110 [00:36<00:37,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2475 ||:  50%|#####     | 55/110 [00:36<00:35,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2482 ||:  51%|#####     | 56/110 [00:37<00:34,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2487 ||:  52%|#####1    | 57/110 [00:38<00:35,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2484 ||:  53%|#####2    | 58/110 [00:38<00:33,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2498 ||:  54%|#####3    | 59/110 [00:39<00:34,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2530 ||:  55%|#####4    | 60/110 [00:40<00:35,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2520 ||:  55%|#####5    | 61/110 [00:40<00:34,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2522 ||:  56%|#####6    | 62/110 [00:41<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2526 ||:  57%|#####7    | 63/110 [00:42<00:32,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2524 ||:  58%|#####8    | 64/110 [00:42<00:30,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2519 ||:  59%|#####9    | 65/110 [00:43<00:28,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2522 ||:  60%|######    | 66/110 [00:44<00:27,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2528 ||:  61%|######    | 67/110 [00:44<00:28,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2541 ||:  62%|######1   | 68/110 [00:45<00:27,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2541 ||:  63%|######2   | 69/110 [00:46<00:26,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2538 ||:  64%|######3   | 70/110 [00:46<00:25,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2548 ||:  65%|######4   | 71/110 [00:47<00:24,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2546 ||:  65%|######5   | 72/110 [00:47<00:24,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2551 ||:  66%|######6   | 73/110 [00:48<00:23,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2550 ||:  67%|######7   | 74/110 [00:49<00:25,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2548 ||:  68%|######8   | 75/110 [00:50<00:25,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2540 ||:  69%|######9   | 76/110 [00:50<00:24,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2532 ||:  70%|#######   | 77/110 [00:51<00:22,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2537 ||:  71%|#######   | 78/110 [00:52<00:21,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2536 ||:  72%|#######1  | 79/110 [00:52<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2528 ||:  73%|#######2  | 80/110 [00:53<00:20,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2524 ||:  74%|#######3  | 81/110 [00:54<00:19,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2527 ||:  75%|#######4  | 82/110 [00:54<00:19,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2527 ||:  75%|#######5  | 83/110 [00:55<00:17,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2529 ||:  76%|#######6  | 84/110 [00:56<00:16,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2533 ||:  77%|#######7  | 85/110 [00:56<00:16,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2536 ||:  78%|#######8  | 86/110 [00:57<00:16,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2536 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2535 ||:  80%|########  | 88/110 [00:58<00:14,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2536 ||:  81%|########  | 89/110 [00:59<00:13,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2537 ||:  82%|########1 | 90/110 [01:00<00:12,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2532 ||:  83%|########2 | 91/110 [01:00<00:12,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2539 ||:  84%|########3 | 92/110 [01:01<00:11,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2531 ||:  85%|########4 | 93/110 [01:02<00:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2524 ||:  85%|########5 | 94/110 [01:02<00:10,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2533 ||:  86%|########6 | 95/110 [01:03<00:09,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2534 ||:  87%|########7 | 96/110 [01:04<00:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2533 ||:  88%|########8 | 97/110 [01:04<00:09,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2541 ||:  89%|########9 | 98/110 [01:05<00:08,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2544 ||:  90%|######### | 99/110 [01:06<00:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2544 ||:  91%|######### | 100/110 [01:07<00:09,  1.07it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2544 ||:  92%|#########1| 101/110 [01:08<00:07,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2541 ||:  93%|#########2| 102/110 [01:09<00:06,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2544 ||:  94%|#########3| 103/110 [01:09<00:05,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2547 ||:  95%|#########4| 104/110 [01:10<00:04,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2549 ||:  95%|#########5| 105/110 [01:11<00:03,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2549 ||:  96%|#########6| 106/110 [01:11<00:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2544 ||:  97%|#########7| 107/110 [01:12<00:02,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2548 ||:  98%|#########8| 108/110 [01:12<00:01,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2547 ||:  99%|#########9| 109/110 [01:13<00:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2543 ||: 100%|##########| 110/110 [01:14<00:00,  1.77it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2543 ||: 100%|##########| 110/110 [01:14<00:00,  1.49it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6635, acc: 0.7188, no_result: 0.0625, loss: 0.6904 ||:   4%|4         | 1/24 [00:00<00:08,  2.82it/s]
+BLEU: 0.5535, acc: 0.7188, no_result: 0.0625, loss: 0.7244 ||:   8%|8         | 2/24 [00:00<00:07,  2.89it/s]
+BLEU: 0.5720, acc: 0.6667, no_result: 0.1042, loss: 0.7905 ||:  12%|#2        | 3/24 [00:01<00:07,  2.90it/s]
+BLEU: 0.6201, acc: 0.6719, no_result: 0.0938, loss: 0.7531 ||:  17%|#6        | 4/24 [00:01<00:07,  2.80it/s]
+BLEU: 0.6594, acc: 0.6813, no_result: 0.1000, loss: 0.7345 ||:  21%|##        | 5/24 [00:01<00:06,  2.73it/s]
+BLEU: 0.6557, acc: 0.6510, no_result: 0.0938, loss: 0.7448 ||:  25%|##5       | 6/24 [00:02<00:06,  2.85it/s]
+BLEU: 0.6653, acc: 0.6607, no_result: 0.0938, loss: 0.7451 ||:  29%|##9       | 7/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.6665, acc: 0.6602, no_result: 0.0938, loss: 0.7562 ||:  33%|###3      | 8/24 [00:02<00:05,  2.99it/s]
+BLEU: 0.6585, acc: 0.6667, no_result: 0.0903, loss: 0.7544 ||:  38%|###7      | 9/24 [00:03<00:05,  2.95it/s]
+BLEU: 0.6684, acc: 0.6656, no_result: 0.0813, loss: 0.7642 ||:  42%|####1     | 10/24 [00:03<00:04,  2.89it/s]
+BLEU: 0.6723, acc: 0.6534, no_result: 0.0824, loss: 0.7960 ||:  46%|####5     | 11/24 [00:03<00:04,  2.76it/s]
+BLEU: 0.6690, acc: 0.6510, no_result: 0.0755, loss: 0.8018 ||:  50%|#####     | 12/24 [00:04<00:04,  2.87it/s]
+BLEU: 0.6567, acc: 0.6490, no_result: 0.0769, loss: 0.8240 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.77it/s]
+BLEU: 0.6590, acc: 0.6295, no_result: 0.0826, loss: 0.8440 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.68it/s]
+BLEU: 0.6471, acc: 0.6188, no_result: 0.1000, loss: 0.8914 ||:  62%|######2   | 15/24 [00:05<00:03,  2.48it/s]
+BLEU: 0.6437, acc: 0.6055, no_result: 0.1074, loss: 0.9116 ||:  67%|######6   | 16/24 [00:05<00:03,  2.43it/s]
+BLEU: 0.6404, acc: 0.6029, no_result: 0.1103, loss: 0.9119 ||:  71%|#######   | 17/24 [00:06<00:02,  2.50it/s]
+BLEU: 0.6378, acc: 0.5990, no_result: 0.1111, loss: 0.9191 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.49it/s]
+BLEU: 0.6401, acc: 0.6053, no_result: 0.1086, loss: 0.8996 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.59it/s]
+BLEU: 0.6395, acc: 0.6078, no_result: 0.1062, loss: 0.8939 ||:  83%|########3 | 20/24 [00:07<00:01,  2.73it/s]
+BLEU: 0.6354, acc: 0.6012, no_result: 0.1042, loss: 0.8986 ||:  88%|########7 | 21/24 [00:07<00:01,  2.71it/s]
+BLEU: 0.6388, acc: 0.5938, no_result: 0.1037, loss: 0.8957 ||:  92%|#########1| 22/24 [00:08<00:00,  2.67it/s]
+BLEU: 0.6414, acc: 0.6019, no_result: 0.0992, loss: 0.8813 ||:  96%|#########5| 23/24 [00:08<00:00,  2.77it/s]
+BLEU: 0.6411, acc: 0.5907, no_result: 0.1043, loss: 0.8977 ||: 100%|##########| 24/24 [00:08<00:00,  3.37it/s]
+BLEU: 0.6411, acc: 0.5907, no_result: 0.1043, loss: 0.8977 ||: 100%|##########| 24/24 [00:08<00:00,  2.80it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2486 ||:   1%|          | 1/110 [00:00<01:13,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2494 ||:   2%|1         | 2/110 [00:01<01:25,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2561 ||:   3%|2         | 3/110 [00:02<01:23,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2420 ||:   4%|3         | 4/110 [00:02<01:14,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2350 ||:   5%|4         | 5/110 [00:03<01:07,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2292 ||:   5%|5         | 6/110 [00:04<01:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2276 ||:   6%|6         | 7/110 [00:04<01:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2270 ||:   7%|7         | 8/110 [00:05<01:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2261 ||:   8%|8         | 9/110 [00:06<01:11,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2299 ||:   9%|9         | 10/110 [00:07<01:14,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2261 ||:  10%|#         | 11/110 [00:07<01:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2245 ||:  11%|#         | 12/110 [00:08<01:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2236 ||:  12%|#1        | 13/110 [00:09<01:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2217 ||:  13%|#2        | 14/110 [00:09<01:02,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2204 ||:  14%|#3        | 15/110 [00:10<01:00,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2169 ||:  15%|#4        | 16/110 [00:10<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2153 ||:  15%|#5        | 17/110 [00:11<01:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2150 ||:  16%|#6        | 18/110 [00:12<01:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2125 ||:  17%|#7        | 19/110 [00:12<01:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2155 ||:  18%|#8        | 20/110 [00:13<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2154 ||:  19%|#9        | 21/110 [00:14<00:59,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2167 ||:  20%|##        | 22/110 [00:14<00:57,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2181 ||:  21%|##        | 23/110 [00:15<01:03,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2161 ||:  22%|##1       | 24/110 [00:16<01:01,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2190 ||:  23%|##2       | 25/110 [00:17<01:03,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2196 ||:  24%|##3       | 26/110 [00:18<01:00,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2192 ||:  25%|##4       | 27/110 [00:18<00:58,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2195 ||:  25%|##5       | 28/110 [00:19<00:56,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2187 ||:  26%|##6       | 29/110 [00:20<00:55,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2194 ||:  27%|##7       | 30/110 [00:20<00:54,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2180 ||:  28%|##8       | 31/110 [00:21<00:51,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2175 ||:  29%|##9       | 32/110 [00:21<00:52,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2153 ||:  30%|###       | 33/110 [00:22<00:51,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2156 ||:  31%|###       | 34/110 [00:23<00:49,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2159 ||:  32%|###1      | 35/110 [00:23<00:47,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2165 ||:  33%|###2      | 36/110 [00:24<00:46,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2171 ||:  34%|###3      | 37/110 [00:25<00:45,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2167 ||:  35%|###4      | 38/110 [00:25<00:44,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2188 ||:  35%|###5      | 39/110 [00:26<00:43,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2198 ||:  36%|###6      | 40/110 [00:27<00:45,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2196 ||:  37%|###7      | 41/110 [00:27<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2203 ||:  38%|###8      | 42/110 [00:28<00:42,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2210 ||:  39%|###9      | 43/110 [00:28<00:43,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2216 ||:  40%|####      | 44/110 [00:29<00:46,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2224 ||:  41%|####      | 45/110 [00:30<00:45,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2215 ||:  42%|####1     | 46/110 [00:31<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2222 ||:  43%|####2     | 47/110 [00:31<00:41,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2241 ||:  44%|####3     | 48/110 [00:32<00:40,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2251 ||:  45%|####4     | 49/110 [00:32<00:39,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2264 ||:  45%|####5     | 50/110 [00:33<00:39,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2266 ||:  46%|####6     | 51/110 [00:34<00:40,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2269 ||:  47%|####7     | 52/110 [00:35<00:42,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2267 ||:  48%|####8     | 53/110 [00:35<00:40,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2260 ||:  49%|####9     | 54/110 [00:36<00:38,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2261 ||:  50%|#####     | 55/110 [00:37<00:37,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2262 ||:  51%|#####     | 56/110 [00:37<00:36,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2270 ||:  52%|#####1    | 57/110 [00:38<00:34,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2267 ||:  53%|#####2    | 58/110 [00:39<00:35,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2273 ||:  54%|#####3    | 59/110 [00:39<00:33,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2273 ||:  55%|#####4    | 60/110 [00:40<00:32,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2283 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2291 ||:  56%|#####6    | 62/110 [00:41<00:32,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2287 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2311 ||:  58%|#####8    | 64/110 [00:43<00:29,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2325 ||:  59%|#####9    | 65/110 [00:43<00:28,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2339 ||:  60%|######    | 66/110 [00:44<00:28,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2338 ||:  61%|######    | 67/110 [00:45<00:29,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2338 ||:  62%|######1   | 68/110 [00:45<00:27,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2327 ||:  63%|######2   | 69/110 [00:46<00:25,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2333 ||:  64%|######3   | 70/110 [00:47<00:25,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2332 ||:  65%|######4   | 71/110 [00:47<00:25,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2335 ||:  65%|######5   | 72/110 [00:48<00:24,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2338 ||:  66%|######6   | 73/110 [00:48<00:23,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2344 ||:  67%|######7   | 74/110 [00:49<00:23,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2348 ||:  68%|######8   | 75/110 [00:50<00:24,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2340 ||:  69%|######9   | 76/110 [00:51<00:22,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2346 ||:  70%|#######   | 77/110 [00:51<00:22,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2351 ||:  71%|#######   | 78/110 [00:52<00:20,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2364 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2371 ||:  73%|#######2  | 80/110 [00:53<00:19,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2374 ||:  74%|#######3  | 81/110 [00:54<00:19,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2379 ||:  75%|#######4  | 82/110 [00:55<00:19,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2381 ||:  75%|#######5  | 83/110 [00:55<00:19,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2382 ||:  76%|#######6  | 84/110 [00:56<00:18,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2386 ||:  77%|#######7  | 85/110 [00:57<00:17,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2384 ||:  78%|#######8  | 86/110 [00:57<00:16,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2374 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2378 ||:  80%|########  | 88/110 [00:59<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2379 ||:  81%|########  | 89/110 [00:59<00:13,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2374 ||:  82%|########1 | 90/110 [01:01<00:18,  1.06it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2382 ||:  83%|########2 | 91/110 [01:02<00:16,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2384 ||:  84%|########3 | 92/110 [01:02<00:14,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2390 ||:  85%|########4 | 93/110 [01:03<00:12,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2391 ||:  85%|########5 | 94/110 [01:03<00:11,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2395 ||:  86%|########6 | 95/110 [01:04<00:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2400 ||:  87%|########7 | 96/110 [01:05<00:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2404 ||:  88%|########8 | 97/110 [01:05<00:08,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2412 ||:  89%|########9 | 98/110 [01:06<00:08,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2411 ||:  90%|######### | 99/110 [01:07<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2410 ||:  91%|######### | 100/110 [01:07<00:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2418 ||:  92%|#########1| 101/110 [01:08<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2424 ||:  93%|#########2| 102/110 [01:09<00:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2426 ||:  94%|#########3| 103/110 [01:09<00:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2420 ||:  95%|#########4| 104/110 [01:10<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2423 ||:  95%|#########5| 105/110 [01:11<00:03,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2424 ||:  96%|#########6| 106/110 [01:11<00:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2426 ||:  97%|#########7| 107/110 [01:12<00:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2438 ||:  98%|#########8| 108/110 [01:13<00:01,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2441 ||:  99%|#########9| 109/110 [01:13<00:00,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2436 ||: 100%|##########| 110/110 [01:14<00:00,  1.86it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2436 ||: 100%|##########| 110/110 [01:14<00:00,  1.49it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6367, acc: 0.6562, no_result: 0.0938, loss: 0.6822 ||:   4%|4         | 1/24 [00:00<00:08,  2.83it/s]
+BLEU: 0.5729, acc: 0.6875, no_result: 0.0781, loss: 0.6587 ||:   8%|8         | 2/24 [00:00<00:07,  2.88it/s]
+BLEU: 0.5866, acc: 0.6354, no_result: 0.1042, loss: 0.7603 ||:  12%|#2        | 3/24 [00:01<00:07,  2.91it/s]
+BLEU: 0.6284, acc: 0.6484, no_result: 0.1016, loss: 0.7370 ||:  17%|#6        | 4/24 [00:01<00:07,  2.82it/s]
+BLEU: 0.6641, acc: 0.6813, no_result: 0.0875, loss: 0.7081 ||:  21%|##        | 5/24 [00:01<00:06,  2.81it/s]
+BLEU: 0.6699, acc: 0.6562, no_result: 0.0833, loss: 0.7350 ||:  25%|##5       | 6/24 [00:02<00:06,  2.91it/s]
+BLEU: 0.6709, acc: 0.6607, no_result: 0.0848, loss: 0.7358 ||:  29%|##9       | 7/24 [00:02<00:05,  3.03it/s]
+BLEU: 0.6719, acc: 0.6641, no_result: 0.0898, loss: 0.7385 ||:  33%|###3      | 8/24 [00:02<00:05,  3.04it/s]
+BLEU: 0.6642, acc: 0.6701, no_result: 0.0868, loss: 0.7363 ||:  38%|###7      | 9/24 [00:03<00:04,  3.01it/s]
+BLEU: 0.6797, acc: 0.6687, no_result: 0.0781, loss: 0.7472 ||:  42%|####1     | 10/24 [00:03<00:04,  2.94it/s]
+BLEU: 0.6789, acc: 0.6477, no_result: 0.0852, loss: 0.7769 ||:  46%|####5     | 11/24 [00:03<00:04,  2.80it/s]
+BLEU: 0.6738, acc: 0.6510, no_result: 0.0833, loss: 0.7808 ||:  50%|#####     | 12/24 [00:04<00:04,  2.89it/s]
+BLEU: 0.6617, acc: 0.6418, no_result: 0.0962, loss: 0.8051 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.78it/s]
+BLEU: 0.6638, acc: 0.6250, no_result: 0.1004, loss: 0.8245 ||:  58%|#####8    | 14/24 [00:04<00:03,  2.67it/s]
+BLEU: 0.6493, acc: 0.6125, no_result: 0.1146, loss: 0.8703 ||:  62%|######2   | 15/24 [00:05<00:03,  2.49it/s]
+BLEU: 0.6458, acc: 0.6055, no_result: 0.1230, loss: 0.8855 ||:  67%|######6   | 16/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.6465, acc: 0.5993, no_result: 0.1305, loss: 0.8836 ||:  71%|#######   | 17/24 [00:06<00:02,  2.50it/s]
+BLEU: 0.6457, acc: 0.6007, no_result: 0.1337, loss: 0.8875 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.49it/s]
+BLEU: 0.6505, acc: 0.6086, no_result: 0.1299, loss: 0.8670 ||:  79%|#######9  | 19/24 [00:06<00:01,  2.59it/s]
+BLEU: 0.6498, acc: 0.6125, no_result: 0.1266, loss: 0.8605 ||:  83%|########3 | 20/24 [00:07<00:01,  2.72it/s]
+BLEU: 0.6444, acc: 0.6027, no_result: 0.1265, loss: 0.8613 ||:  88%|########7 | 21/24 [00:07<00:01,  2.70it/s]
+BLEU: 0.6480, acc: 0.5994, no_result: 0.1236, loss: 0.8571 ||:  92%|#########1| 22/24 [00:08<00:00,  2.67it/s]
+BLEU: 0.6514, acc: 0.6060, no_result: 0.1196, loss: 0.8423 ||:  96%|#########5| 23/24 [00:08<00:00,  2.77it/s]
+BLEU: 0.6502, acc: 0.6039, no_result: 0.1192, loss: 0.8546 ||: 100%|##########| 24/24 [00:08<00:00,  3.42it/s]
+BLEU: 0.6502, acc: 0.6039, no_result: 0.1192, loss: 0.8546 ||: 100%|##########| 24/24 [00:08<00:00,  2.82it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.3083 ||:   1%|          | 1/110 [00:00<01:06,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2484 ||:   2%|1         | 2/110 [00:01<01:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2367 ||:   3%|2         | 3/110 [00:01<01:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2446 ||:   4%|3         | 4/110 [00:02<01:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2397 ||:   5%|4         | 5/110 [00:03<01:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2333 ||:   5%|5         | 6/110 [00:04<01:15,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2321 ||:   6%|6         | 7/110 [00:04<01:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2339 ||:   7%|7         | 8/110 [00:05<01:10,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2229 ||:   8%|8         | 9/110 [00:06<01:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2260 ||:   9%|9         | 10/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2249 ||:  10%|#         | 11/110 [00:07<01:06,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2273 ||:  11%|#         | 12/110 [00:08<01:10,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2296 ||:  12%|#1        | 13/110 [00:09<01:11,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2309 ||:  13%|#2        | 14/110 [00:09<01:12,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2296 ||:  14%|#3        | 15/110 [00:10<01:12,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2294 ||:  15%|#4        | 16/110 [00:11<01:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2297 ||:  15%|#5        | 17/110 [00:11<01:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2311 ||:  16%|#6        | 18/110 [00:12<01:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2278 ||:  17%|#7        | 19/110 [00:13<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2273 ||:  18%|#8        | 20/110 [00:14<01:07,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2304 ||:  19%|#9        | 21/110 [00:14<01:03,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2287 ||:  20%|##        | 22/110 [00:15<00:59,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2287 ||:  21%|##        | 23/110 [00:16<00:58,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2297 ||:  22%|##1       | 24/110 [00:16<01:00,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2284 ||:  23%|##2       | 25/110 [00:17<00:59,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2308 ||:  24%|##3       | 26/110 [00:18<00:56,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2354 ||:  25%|##4       | 27/110 [00:18<00:59,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2353 ||:  25%|##5       | 28/110 [00:19<00:58,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2341 ||:  26%|##6       | 29/110 [00:20<00:59,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2323 ||:  27%|##7       | 30/110 [00:20<00:55,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2321 ||:  28%|##8       | 31/110 [00:21<00:55,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2332 ||:  29%|##9       | 32/110 [00:22<00:52,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2338 ||:  30%|###       | 33/110 [00:22<00:50,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2345 ||:  31%|###       | 34/110 [00:23<00:50,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2361 ||:  32%|###1      | 35/110 [00:24<00:50,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2368 ||:  33%|###2      | 36/110 [00:24<00:48,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2371 ||:  34%|###3      | 37/110 [00:25<00:50,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2378 ||:  35%|###4      | 38/110 [00:26<00:50,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2371 ||:  35%|###5      | 39/110 [00:27<00:48,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2384 ||:  36%|###6      | 40/110 [00:27<00:46,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2384 ||:  37%|###7      | 41/110 [00:28<00:46,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2373 ||:  38%|###8      | 42/110 [00:29<00:46,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2369 ||:  39%|###9      | 43/110 [00:29<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2380 ||:  40%|####      | 44/110 [00:30<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2381 ||:  41%|####      | 45/110 [00:31<00:45,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2375 ||:  42%|####1     | 46/110 [00:31<00:44,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2389 ||:  43%|####2     | 47/110 [00:32<00:44,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2385 ||:  44%|####3     | 48/110 [00:33<00:42,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2389 ||:  45%|####4     | 49/110 [00:33<00:41,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2384 ||:  45%|####5     | 50/110 [00:34<00:41,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2370 ||:  46%|####6     | 51/110 [00:35<00:41,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2361 ||:  47%|####7     | 52/110 [00:36<00:41,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2360 ||:  48%|####8     | 53/110 [00:36<00:40,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2357 ||:  49%|####9     | 54/110 [00:37<00:38,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2364 ||:  50%|#####     | 55/110 [00:38<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2361 ||:  51%|#####     | 56/110 [00:38<00:38,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2355 ||:  52%|#####1    | 57/110 [00:39<00:37,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2352 ||:  53%|#####2    | 58/110 [00:40<00:36,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2353 ||:  54%|#####3    | 59/110 [00:40<00:35,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2364 ||:  55%|#####4    | 60/110 [00:41<00:36,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2372 ||:  55%|#####5    | 61/110 [00:42<00:36,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2372 ||:  56%|#####6    | 62/110 [00:43<00:35,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2373 ||:  57%|#####7    | 63/110 [00:44<00:36,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2364 ||:  58%|#####8    | 64/110 [00:44<00:34,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2371 ||:  59%|#####9    | 65/110 [00:45<00:32,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2376 ||:  60%|######    | 66/110 [00:46<00:31,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2368 ||:  61%|######    | 67/110 [00:46<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2372 ||:  62%|######1   | 68/110 [00:47<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2365 ||:  63%|######2   | 69/110 [00:48<00:29,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2363 ||:  64%|######3   | 70/110 [00:49<00:28,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2364 ||:  65%|######4   | 71/110 [00:49<00:28,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2366 ||:  65%|######5   | 72/110 [00:50<00:27,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2367 ||:  66%|######6   | 73/110 [00:51<00:27,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2364 ||:  67%|######7   | 74/110 [00:52<00:26,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2377 ||:  68%|######8   | 75/110 [00:52<00:24,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2380 ||:  69%|######9   | 76/110 [00:53<00:24,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2379 ||:  70%|#######   | 77/110 [00:54<00:23,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2375 ||:  71%|#######   | 78/110 [00:54<00:23,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2383 ||:  72%|#######1  | 79/110 [00:55<00:23,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2387 ||:  73%|#######2  | 80/110 [00:57<00:29,  1.02it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2391 ||:  74%|#######3  | 81/110 [00:57<00:26,  1.11it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2391 ||:  75%|#######4  | 82/110 [00:58<00:23,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2388 ||:  75%|#######5  | 83/110 [00:59<00:22,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2387 ||:  76%|#######6  | 84/110 [01:00<00:20,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2380 ||:  77%|#######7  | 85/110 [01:00<00:19,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2373 ||:  78%|#######8  | 86/110 [01:01<00:17,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2379 ||:  79%|#######9  | 87/110 [01:02<00:16,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2380 ||:  80%|########  | 88/110 [01:02<00:15,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2391 ||:  81%|########  | 89/110 [01:03<00:14,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2407 ||:  82%|########1 | 90/110 [01:04<00:13,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2405 ||:  83%|########2 | 91/110 [01:04<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2404 ||:  84%|########3 | 92/110 [01:05<00:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2404 ||:  85%|########4 | 93/110 [01:06<00:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2400 ||:  85%|########5 | 94/110 [01:06<00:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2402 ||:  86%|########6 | 95/110 [01:07<00:10,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2405 ||:  87%|########7 | 96/110 [01:08<00:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2405 ||:  88%|########8 | 97/110 [01:09<00:09,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2406 ||:  89%|########9 | 98/110 [01:10<00:09,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2410 ||:  90%|######### | 99/110 [01:10<00:08,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2411 ||:  91%|######### | 100/110 [01:11<00:07,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2409 ||:  92%|#########1| 101/110 [01:12<00:06,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2403 ||:  93%|#########2| 102/110 [01:12<00:05,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2408 ||:  94%|#########3| 103/110 [01:13<00:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2408 ||:  95%|#########4| 104/110 [01:14<00:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2412 ||:  95%|#########5| 105/110 [01:14<00:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2406 ||:  96%|#########6| 106/110 [01:15<00:02,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2409 ||:  97%|#########7| 107/110 [01:16<00:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2400 ||:  98%|#########8| 108/110 [01:16<00:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2398 ||:  99%|#########9| 109/110 [01:17<00:00,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2408 ||: 100%|##########| 110/110 [01:18<00:00,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2408 ||: 100%|##########| 110/110 [01:18<00:00,  1.41it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6276, acc: 0.7188, no_result: 0.0312, loss: 0.7206 ||:   4%|4         | 1/24 [00:00<00:08,  2.75it/s]
+BLEU: 0.5712, acc: 0.7188, no_result: 0.0469, loss: 0.7129 ||:   8%|8         | 2/24 [00:00<00:07,  2.82it/s]
+BLEU: 0.5636, acc: 0.6875, no_result: 0.0938, loss: 0.7902 ||:  12%|#2        | 3/24 [00:01<00:07,  2.83it/s]
+BLEU: 0.6140, acc: 0.6797, no_result: 0.0938, loss: 0.7571 ||:  17%|#6        | 4/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.6522, acc: 0.7000, no_result: 0.0938, loss: 0.7407 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.6531, acc: 0.6771, no_result: 0.0990, loss: 0.7585 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.6500, acc: 0.6786, no_result: 0.0893, loss: 0.7500 ||:  29%|##9       | 7/24 [00:02<00:05,  2.90it/s]
+BLEU: 0.6362, acc: 0.6836, no_result: 0.0859, loss: 0.7734 ||:  33%|###3      | 8/24 [00:02<00:05,  2.92it/s]
+BLEU: 0.6327, acc: 0.6701, no_result: 0.0903, loss: 0.7723 ||:  38%|###7      | 9/24 [00:03<00:05,  2.72it/s]
+BLEU: 0.6403, acc: 0.6594, no_result: 0.0906, loss: 0.7815 ||:  42%|####1     | 10/24 [00:03<00:05,  2.60it/s]
+BLEU: 0.6370, acc: 0.6449, no_result: 0.0909, loss: 0.8117 ||:  46%|####5     | 11/24 [00:04<00:05,  2.46it/s]
+BLEU: 0.6352, acc: 0.6458, no_result: 0.0859, loss: 0.8137 ||:  50%|#####     | 12/24 [00:04<00:04,  2.51it/s]
+BLEU: 0.6245, acc: 0.6346, no_result: 0.0962, loss: 0.8435 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.39it/s]
+BLEU: 0.6289, acc: 0.6228, no_result: 0.1004, loss: 0.8705 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.28it/s]
+BLEU: 0.6176, acc: 0.6000, no_result: 0.1208, loss: 0.9240 ||:  62%|######2   | 15/24 [00:06<00:04,  2.11it/s]
+BLEU: 0.6122, acc: 0.5898, no_result: 0.1289, loss: 0.9489 ||:  67%|######6   | 16/24 [00:06<00:03,  2.04it/s]
+BLEU: 0.6130, acc: 0.5882, no_result: 0.1360, loss: 0.9430 ||:  71%|#######   | 17/24 [00:06<00:03,  2.11it/s]
+BLEU: 0.6137, acc: 0.5868, no_result: 0.1372, loss: 0.9505 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.11it/s]
+BLEU: 0.6158, acc: 0.5938, no_result: 0.1316, loss: 0.9326 ||:  79%|#######9  | 19/24 [00:08<00:02,  1.93it/s]
+BLEU: 0.6139, acc: 0.5984, no_result: 0.1281, loss: 0.9244 ||:  83%|########3 | 20/24 [00:08<00:01,  2.11it/s]
+BLEU: 0.6104, acc: 0.5952, no_result: 0.1250, loss: 0.9259 ||:  88%|########7 | 21/24 [00:08<00:01,  2.17it/s]
+BLEU: 0.6148, acc: 0.5881, no_result: 0.1222, loss: 0.9211 ||:  92%|#########1| 22/24 [00:09<00:00,  2.19it/s]
+BLEU: 0.6215, acc: 0.5965, no_result: 0.1196, loss: 0.9041 ||:  96%|#########5| 23/24 [00:09<00:00,  2.30it/s]
+BLEU: 0.6210, acc: 0.5901, no_result: 0.1192, loss: 0.9148 ||: 100%|##########| 24/24 [00:09<00:00,  2.83it/s]
+BLEU: 0.6210, acc: 0.5901, no_result: 0.1192, loss: 0.9148 ||: 100%|##########| 24/24 [00:09<00:00,  2.43it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2348 ||:   1%|          | 1/110 [00:00<01:19,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2332 ||:   2%|1         | 2/110 [00:01<01:16,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2574 ||:   3%|2         | 3/110 [00:02<01:19,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2573 ||:   4%|3         | 4/110 [00:02<01:20,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2390 ||:   5%|4         | 5/110 [00:03<01:18,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2384 ||:   5%|5         | 6/110 [00:04<01:14,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2397 ||:   6%|6         | 7/110 [00:05<01:16,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2380 ||:   7%|7         | 8/110 [00:05<01:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2322 ||:   8%|8         | 9/110 [00:06<01:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2293 ||:   9%|9         | 10/110 [00:07<01:12,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2248 ||:  10%|#         | 11/110 [00:08<01:11,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2297 ||:  11%|#         | 12/110 [00:08<01:11,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2256 ||:  12%|#1        | 13/110 [00:09<01:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2198 ||:  13%|#2        | 14/110 [00:10<01:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2201 ||:  14%|#3        | 15/110 [00:10<01:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2229 ||:  15%|#4        | 16/110 [00:11<01:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2240 ||:  15%|#5        | 17/110 [00:12<01:03,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2252 ||:  16%|#6        | 18/110 [00:12<01:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2233 ||:  17%|#7        | 19/110 [00:13<01:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2186 ||:  18%|#8        | 20/110 [00:14<01:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2174 ||:  19%|#9        | 21/110 [00:14<01:02,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2193 ||:  20%|##        | 22/110 [00:15<01:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2181 ||:  21%|##        | 23/110 [00:16<01:05,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2188 ||:  22%|##1       | 24/110 [00:17<01:06,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2199 ||:  23%|##2       | 25/110 [00:18<01:04,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2190 ||:  24%|##3       | 26/110 [00:18<01:01,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2176 ||:  25%|##4       | 27/110 [00:19<00:57,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2150 ||:  25%|##5       | 28/110 [00:20<00:55,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2147 ||:  26%|##6       | 29/110 [00:20<00:57,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2158 ||:  27%|##7       | 30/110 [00:21<00:58,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2156 ||:  28%|##8       | 31/110 [00:22<00:58,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2170 ||:  29%|##9       | 32/110 [00:23<00:55,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2164 ||:  30%|###       | 33/110 [00:23<00:55,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2165 ||:  31%|###       | 34/110 [00:24<00:59,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2174 ||:  32%|###1      | 35/110 [00:25<00:57,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2181 ||:  33%|###2      | 36/110 [00:26<00:55,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2176 ||:  34%|###3      | 37/110 [00:26<00:52,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2160 ||:  35%|###4      | 38/110 [00:27<00:51,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2166 ||:  35%|###5      | 39/110 [00:28<00:49,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2156 ||:  36%|###6      | 40/110 [00:28<00:48,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2153 ||:  37%|###7      | 41/110 [00:29<00:51,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2168 ||:  38%|###8      | 42/110 [00:30<00:53,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2169 ||:  39%|###9      | 43/110 [00:31<00:50,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2167 ||:  40%|####      | 44/110 [00:31<00:47,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2178 ||:  41%|####      | 45/110 [00:32<00:47,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2169 ||:  42%|####1     | 46/110 [00:33<00:46,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2163 ||:  43%|####2     | 47/110 [00:33<00:44,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2159 ||:  44%|####3     | 48/110 [00:34<00:43,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2154 ||:  45%|####4     | 49/110 [00:35<00:43,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2149 ||:  45%|####5     | 50/110 [00:36<00:43,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2158 ||:  46%|####6     | 51/110 [00:36<00:43,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2149 ||:  47%|####7     | 52/110 [00:37<00:40,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2159 ||:  48%|####8     | 53/110 [00:38<00:40,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2155 ||:  49%|####9     | 54/110 [00:39<00:42,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2151 ||:  50%|#####     | 55/110 [00:39<00:40,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2148 ||:  51%|#####     | 56/110 [00:40<00:38,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2142 ||:  52%|#####1    | 57/110 [00:41<00:39,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2151 ||:  53%|#####2    | 58/110 [00:41<00:37,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2152 ||:  54%|#####3    | 59/110 [00:42<00:38,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2160 ||:  55%|#####4    | 60/110 [00:43<00:36,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2151 ||:  55%|#####5    | 61/110 [00:44<00:35,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2153 ||:  56%|#####6    | 62/110 [00:44<00:34,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2151 ||:  57%|#####7    | 63/110 [00:45<00:33,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2143 ||:  58%|#####8    | 64/110 [00:46<00:32,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2147 ||:  59%|#####9    | 65/110 [00:47<00:32,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2150 ||:  60%|######    | 66/110 [00:47<00:30,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2151 ||:  61%|######    | 67/110 [00:48<00:30,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2155 ||:  62%|######1   | 68/110 [00:49<00:29,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2159 ||:  63%|######2   | 69/110 [00:49<00:30,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2155 ||:  64%|######3   | 70/110 [00:51<00:39,  1.00it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2154 ||:  65%|######4   | 71/110 [00:52<00:35,  1.11it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2158 ||:  65%|######5   | 72/110 [00:52<00:32,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2167 ||:  66%|######6   | 73/110 [00:53<00:30,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2163 ||:  67%|######7   | 74/110 [00:54<00:27,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2176 ||:  68%|######8   | 75/110 [00:55<00:26,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2168 ||:  69%|######9   | 76/110 [00:55<00:24,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2172 ||:  70%|#######   | 77/110 [00:56<00:23,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2175 ||:  71%|#######   | 78/110 [00:57<00:22,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2177 ||:  72%|#######1  | 79/110 [00:57<00:23,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2178 ||:  73%|#######2  | 80/110 [00:58<00:21,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2181 ||:  74%|#######3  | 81/110 [00:59<00:22,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2177 ||:  75%|#######4  | 82/110 [01:00<00:21,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2176 ||:  75%|#######5  | 83/110 [01:00<00:19,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2179 ||:  76%|#######6  | 84/110 [01:01<00:19,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2180 ||:  77%|#######7  | 85/110 [01:02<00:17,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2189 ||:  78%|#######8  | 86/110 [01:02<00:16,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2190 ||:  79%|#######9  | 87/110 [01:03<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2195 ||:  80%|########  | 88/110 [01:04<00:17,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2205 ||:  81%|########  | 89/110 [01:05<00:15,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2208 ||:  82%|########1 | 90/110 [01:06<00:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2210 ||:  83%|########2 | 91/110 [01:06<00:13,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2212 ||:  84%|########3 | 92/110 [01:07<00:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2217 ||:  85%|########4 | 93/110 [01:08<00:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2213 ||:  85%|########5 | 94/110 [01:08<00:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2221 ||:  86%|########6 | 95/110 [01:09<00:10,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2228 ||:  87%|########7 | 96/110 [01:10<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2229 ||:  88%|########8 | 97/110 [01:10<00:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2231 ||:  89%|########9 | 98/110 [01:11<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2227 ||:  90%|######### | 99/110 [01:12<00:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2234 ||:  91%|######### | 100/110 [01:12<00:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2230 ||:  92%|#########1| 101/110 [01:13<00:06,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2230 ||:  93%|#########2| 102/110 [01:14<00:05,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2229 ||:  94%|#########3| 103/110 [01:15<00:05,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2229 ||:  95%|#########4| 104/110 [01:16<00:04,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2229 ||:  95%|#########5| 105/110 [01:16<00:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2231 ||:  96%|#########6| 106/110 [01:17<00:03,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2233 ||:  97%|#########7| 107/110 [01:18<00:02,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2238 ||:  98%|#########8| 108/110 [01:19<00:01,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2243 ||:  99%|#########9| 109/110 [01:19<00:00,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2251 ||: 100%|##########| 110/110 [01:20<00:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2251 ||: 100%|##########| 110/110 [01:20<00:00,  1.37it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7055, acc: 0.7500, no_result: 0.0312, loss: 0.7290 ||:   4%|4         | 1/24 [00:00<00:09,  2.44it/s]
+BLEU: 0.6439, acc: 0.7344, no_result: 0.0469, loss: 0.6940 ||:   8%|8         | 2/24 [00:00<00:08,  2.49it/s]
+BLEU: 0.6060, acc: 0.6667, no_result: 0.1146, loss: 0.7982 ||:  12%|#2        | 3/24 [00:01<00:08,  2.47it/s]
+BLEU: 0.6442, acc: 0.6641, no_result: 0.1094, loss: 0.7938 ||:  17%|#6        | 4/24 [00:01<00:08,  2.42it/s]
+BLEU: 0.6800, acc: 0.7000, no_result: 0.0938, loss: 0.7597 ||:  21%|##        | 5/24 [00:02<00:07,  2.42it/s]
+BLEU: 0.6782, acc: 0.6667, no_result: 0.0938, loss: 0.7752 ||:  25%|##5       | 6/24 [00:02<00:07,  2.51it/s]
+BLEU: 0.6871, acc: 0.6652, no_result: 0.0893, loss: 0.7702 ||:  29%|##9       | 7/24 [00:02<00:06,  2.60it/s]
+BLEU: 0.6784, acc: 0.6719, no_result: 0.0898, loss: 0.7816 ||:  33%|###3      | 8/24 [00:03<00:06,  2.63it/s]
+BLEU: 0.6659, acc: 0.6597, no_result: 0.0938, loss: 0.7895 ||:  38%|###7      | 9/24 [00:03<00:05,  2.56it/s]
+BLEU: 0.6780, acc: 0.6531, no_result: 0.0938, loss: 0.7964 ||:  42%|####1     | 10/24 [00:03<00:05,  2.49it/s]
+BLEU: 0.6780, acc: 0.6420, no_result: 0.0938, loss: 0.8243 ||:  46%|####5     | 11/24 [00:04<00:05,  2.38it/s]
+BLEU: 0.6730, acc: 0.6458, no_result: 0.0859, loss: 0.8253 ||:  50%|#####     | 12/24 [00:04<00:04,  2.48it/s]
+BLEU: 0.6651, acc: 0.6418, no_result: 0.0913, loss: 0.8434 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.40it/s]
+BLEU: 0.6646, acc: 0.6272, no_result: 0.0982, loss: 0.8744 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.29it/s]
+BLEU: 0.6491, acc: 0.6062, no_result: 0.1187, loss: 0.9213 ||:  62%|######2   | 15/24 [00:06<00:04,  2.12it/s]
+BLEU: 0.6420, acc: 0.5996, no_result: 0.1270, loss: 0.9469 ||:  67%|######6   | 16/24 [00:06<00:03,  2.06it/s]
+BLEU: 0.6416, acc: 0.5938, no_result: 0.1305, loss: 0.9439 ||:  71%|#######   | 17/24 [00:07<00:03,  2.14it/s]
+BLEU: 0.6388, acc: 0.5955, no_result: 0.1354, loss: 0.9495 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.13it/s]
+BLEU: 0.6448, acc: 0.6036, no_result: 0.1316, loss: 0.9313 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.22it/s]
+BLEU: 0.6449, acc: 0.6047, no_result: 0.1281, loss: 0.9254 ||:  83%|########3 | 20/24 [00:08<00:01,  2.34it/s]
+BLEU: 0.6387, acc: 0.5967, no_result: 0.1265, loss: 0.9280 ||:  88%|########7 | 21/24 [00:08<00:01,  2.33it/s]
+BLEU: 0.6417, acc: 0.5852, no_result: 0.1250, loss: 0.9302 ||:  92%|#########1| 22/24 [00:09<00:00,  2.29it/s]
+BLEU: 0.6458, acc: 0.5910, no_result: 0.1223, loss: 0.9140 ||:  96%|#########5| 23/24 [00:09<00:00,  2.38it/s]
+BLEU: 0.6455, acc: 0.5942, no_result: 0.1172, loss: 0.9150 ||: 100%|##########| 24/24 [00:09<00:00,  2.92it/s]
+BLEU: 0.6455, acc: 0.5942, no_result: 0.1172, loss: 0.9150 ||: 100%|##########| 24/24 [00:09<00:00,  2.42it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2160 ||:   1%|          | 1/110 [00:00<01:23,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1915 ||:   2%|1         | 2/110 [00:01<01:20,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1999 ||:   3%|2         | 3/110 [00:02<01:20,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1998 ||:   4%|3         | 4/110 [00:03<01:20,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1958 ||:   5%|4         | 5/110 [00:03<01:22,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1868 ||:   5%|5         | 6/110 [00:04<01:18,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2003 ||:   6%|6         | 7/110 [00:05<01:14,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1948 ||:   7%|7         | 8/110 [00:05<01:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1949 ||:   8%|8         | 9/110 [00:06<01:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1993 ||:   9%|9         | 10/110 [00:07<01:12,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2025 ||:  10%|#         | 11/110 [00:07<01:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2109 ||:  11%|#         | 12/110 [00:08<01:09,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2072 ||:  12%|#1        | 13/110 [00:09<01:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2094 ||:  13%|#2        | 14/110 [00:10<01:10,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2050 ||:  14%|#3        | 15/110 [00:10<01:10,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2039 ||:  15%|#4        | 16/110 [00:11<01:09,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2105 ||:  15%|#5        | 17/110 [00:12<01:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2122 ||:  16%|#6        | 18/110 [00:13<01:11,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2091 ||:  17%|#7        | 19/110 [00:13<01:08,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2100 ||:  18%|#8        | 20/110 [00:14<01:06,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2081 ||:  19%|#9        | 21/110 [00:15<01:03,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2080 ||:  20%|##        | 22/110 [00:16<01:02,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2070 ||:  21%|##        | 23/110 [00:16<01:04,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2062 ||:  22%|##1       | 24/110 [00:17<01:02,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2053 ||:  23%|##2       | 25/110 [00:18<01:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2056 ||:  24%|##3       | 26/110 [00:18<00:59,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2026 ||:  25%|##4       | 27/110 [00:19<00:57,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2045 ||:  25%|##5       | 28/110 [00:20<00:56,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2032 ||:  26%|##6       | 29/110 [00:21<00:57,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2019 ||:  27%|##7       | 30/110 [00:22<01:03,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2007 ||:  28%|##8       | 31/110 [00:22<01:01,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2000 ||:  29%|##9       | 32/110 [00:23<00:58,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2004 ||:  30%|###       | 33/110 [00:24<01:01,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2003 ||:  31%|###       | 34/110 [00:25<00:57,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1995 ||:  32%|###1      | 35/110 [00:25<00:55,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2012 ||:  33%|###2      | 36/110 [00:26<00:53,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2000 ||:  34%|###3      | 37/110 [00:27<00:52,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1994 ||:  35%|###4      | 38/110 [00:27<00:52,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1987 ||:  35%|###5      | 39/110 [00:28<00:50,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1988 ||:  36%|###6      | 40/110 [00:29<00:51,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1985 ||:  37%|###7      | 41/110 [00:30<00:51,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1992 ||:  38%|###8      | 42/110 [00:30<00:49,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2011 ||:  39%|###9      | 43/110 [00:31<00:50,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2028 ||:  40%|####      | 44/110 [00:32<00:47,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2018 ||:  41%|####      | 45/110 [00:32<00:46,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2019 ||:  42%|####1     | 46/110 [00:33<00:47,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2014 ||:  43%|####2     | 47/110 [00:34<00:48,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2000 ||:  44%|####3     | 48/110 [00:35<00:48,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2005 ||:  45%|####4     | 49/110 [00:36<00:46,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2012 ||:  45%|####5     | 50/110 [00:36<00:43,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2001 ||:  46%|####6     | 51/110 [00:37<00:44,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2009 ||:  47%|####7     | 52/110 [00:38<00:41,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2008 ||:  48%|####8     | 53/110 [00:38<00:39,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2001 ||:  49%|####9     | 54/110 [00:39<00:38,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1998 ||:  50%|#####     | 55/110 [00:40<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2004 ||:  51%|#####     | 56/110 [00:40<00:38,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2003 ||:  52%|#####1    | 57/110 [00:41<00:38,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1996 ||:  53%|#####2    | 58/110 [00:42<00:36,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2000 ||:  54%|#####3    | 59/110 [00:43<00:37,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2009 ||:  55%|#####4    | 60/110 [00:44<00:48,  1.03it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2025 ||:  55%|#####5    | 61/110 [00:45<00:45,  1.07it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2024 ||:  56%|#####6    | 62/110 [00:46<00:41,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2026 ||:  57%|#####7    | 63/110 [00:46<00:37,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2035 ||:  58%|#####8    | 64/110 [00:47<00:36,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2033 ||:  59%|#####9    | 65/110 [00:48<00:34,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2039 ||:  60%|######    | 66/110 [00:49<00:33,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2043 ||:  61%|######    | 67/110 [00:49<00:31,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2041 ||:  62%|######1   | 68/110 [00:50<00:29,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2037 ||:  63%|######2   | 69/110 [00:51<00:29,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2026 ||:  64%|######3   | 70/110 [00:51<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2030 ||:  65%|######4   | 71/110 [00:52<00:27,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2034 ||:  65%|######5   | 72/110 [00:53<00:26,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2038 ||:  66%|######6   | 73/110 [00:53<00:25,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2047 ||:  67%|######7   | 74/110 [00:54<00:25,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2051 ||:  68%|######8   | 75/110 [00:55<00:24,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2050 ||:  69%|######9   | 76/110 [00:56<00:23,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2052 ||:  70%|#######   | 77/110 [00:56<00:23,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2056 ||:  71%|#######   | 78/110 [00:57<00:22,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2050 ||:  72%|#######1  | 79/110 [00:58<00:21,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2064 ||:  73%|#######2  | 80/110 [00:58<00:20,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2062 ||:  74%|#######3  | 81/110 [00:59<00:20,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2068 ||:  75%|#######4  | 82/110 [01:00<00:19,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2072 ||:  75%|#######5  | 83/110 [01:00<00:18,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2070 ||:  76%|#######6  | 84/110 [01:01<00:18,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2073 ||:  77%|#######7  | 85/110 [01:02<00:17,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2077 ||:  78%|#######8  | 86/110 [01:03<00:16,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2078 ||:  79%|#######9  | 87/110 [01:03<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2086 ||:  80%|########  | 88/110 [01:04<00:15,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2092 ||:  81%|########  | 89/110 [01:05<00:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2098 ||:  82%|########1 | 90/110 [01:05<00:14,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2097 ||:  83%|########2 | 91/110 [01:06<00:13,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2090 ||:  84%|########3 | 92/110 [01:07<00:12,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2087 ||:  85%|########4 | 93/110 [01:08<00:12,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2094 ||:  85%|########5 | 94/110 [01:08<00:11,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2094 ||:  86%|########6 | 95/110 [01:09<00:10,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2086 ||:  87%|########7 | 96/110 [01:10<00:10,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2097 ||:  88%|########8 | 97/110 [01:11<00:09,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2104 ||:  89%|########9 | 98/110 [01:11<00:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2098 ||:  90%|######### | 99/110 [01:12<00:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2096 ||:  91%|######### | 100/110 [01:13<00:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2092 ||:  92%|#########1| 101/110 [01:13<00:06,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2093 ||:  93%|#########2| 102/110 [01:14<00:06,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2091 ||:  94%|#########3| 103/110 [01:15<00:05,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2085 ||:  95%|#########4| 104/110 [01:16<00:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2088 ||:  95%|#########5| 105/110 [01:16<00:03,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2088 ||:  96%|#########6| 106/110 [01:17<00:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2084 ||:  97%|#########7| 107/110 [01:18<00:02,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2085 ||:  98%|#########8| 108/110 [01:19<00:01,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2082 ||:  99%|#########9| 109/110 [01:19<00:00,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2083 ||: 100%|##########| 110/110 [01:20<00:00,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2083 ||: 100%|##########| 110/110 [01:20<00:00,  1.37it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7622, acc: 0.6562, no_result: 0.0938, loss: 0.6791 ||:   4%|4         | 1/24 [00:00<00:08,  2.79it/s]
+BLEU: 0.7134, acc: 0.6719, no_result: 0.0781, loss: 0.6701 ||:   8%|8         | 2/24 [00:00<00:07,  2.86it/s]
+BLEU: 0.6770, acc: 0.6562, no_result: 0.1146, loss: 0.8022 ||:  12%|#2        | 3/24 [00:01<00:07,  2.84it/s]
+BLEU: 0.6933, acc: 0.6328, no_result: 0.1172, loss: 0.7857 ||:  17%|#6        | 4/24 [00:01<00:07,  2.76it/s]
+BLEU: 0.7133, acc: 0.6500, no_result: 0.1125, loss: 0.7611 ||:  21%|##        | 5/24 [00:01<00:06,  2.73it/s]
+BLEU: 0.6977, acc: 0.6146, no_result: 0.1094, loss: 0.7834 ||:  25%|##5       | 6/24 [00:02<00:06,  2.85it/s]
+BLEU: 0.7089, acc: 0.6384, no_result: 0.1027, loss: 0.7709 ||:  29%|##9       | 7/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.7086, acc: 0.6523, no_result: 0.1016, loss: 0.7854 ||:  33%|###3      | 8/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.6983, acc: 0.6458, no_result: 0.1007, loss: 0.7789 ||:  38%|###7      | 9/24 [00:03<00:05,  2.94it/s]
+BLEU: 0.7070, acc: 0.6406, no_result: 0.0906, loss: 0.7876 ||:  42%|####1     | 10/24 [00:03<00:04,  2.89it/s]
+BLEU: 0.7044, acc: 0.6193, no_result: 0.1023, loss: 0.8246 ||:  46%|####5     | 11/24 [00:03<00:04,  2.74it/s]
+BLEU: 0.6992, acc: 0.6172, no_result: 0.0938, loss: 0.8248 ||:  50%|#####     | 12/24 [00:04<00:04,  2.86it/s]
+BLEU: 0.6919, acc: 0.6178, no_result: 0.0938, loss: 0.8383 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.74it/s]
+BLEU: 0.6923, acc: 0.6027, no_result: 0.1027, loss: 0.8697 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.63it/s]
+BLEU: 0.6726, acc: 0.5813, no_result: 0.1208, loss: 0.9299 ||:  62%|######2   | 15/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.6672, acc: 0.5801, no_result: 0.1250, loss: 0.9510 ||:  67%|######6   | 16/24 [00:05<00:03,  2.39it/s]
+BLEU: 0.6671, acc: 0.5790, no_result: 0.1305, loss: 0.9465 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.6676, acc: 0.5816, no_result: 0.1302, loss: 0.9561 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.47it/s]
+BLEU: 0.6720, acc: 0.5888, no_result: 0.1266, loss: 0.9384 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.56it/s]
+BLEU: 0.6718, acc: 0.5906, no_result: 0.1234, loss: 0.9329 ||:  83%|########3 | 20/24 [00:07<00:01,  2.70it/s]
+BLEU: 0.6681, acc: 0.5848, no_result: 0.1205, loss: 0.9348 ||:  88%|########7 | 21/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.6726, acc: 0.5852, no_result: 0.1165, loss: 0.9330 ||:  92%|#########1| 22/24 [00:08<00:00,  2.65it/s]
+BLEU: 0.6770, acc: 0.5938, no_result: 0.1141, loss: 0.9167 ||:  96%|#########5| 23/24 [00:08<00:00,  2.75it/s]
+BLEU: 0.6765, acc: 0.5922, no_result: 0.1140, loss: 0.9230 ||: 100%|##########| 24/24 [00:08<00:00,  3.39it/s]
+BLEU: 0.6765, acc: 0.5922, no_result: 0.1140, loss: 0.9230 ||: 100%|##########| 24/24 [00:08<00:00,  2.78it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2037 ||:   1%|          | 1/110 [00:00<01:10,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1776 ||:   2%|1         | 2/110 [00:01<01:12,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1629 ||:   3%|2         | 3/110 [00:01<01:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1805 ||:   4%|3         | 4/110 [00:02<01:08,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1743 ||:   5%|4         | 5/110 [00:03<01:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1730 ||:   5%|5         | 6/110 [00:03<01:02,  1.65it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1769 ||:   6%|6         | 7/110 [00:04<01:01,  1.67it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1887 ||:   7%|7         | 8/110 [00:05<01:05,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1906 ||:   8%|8         | 9/110 [00:05<01:06,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1857 ||:   9%|9         | 10/110 [00:06<01:03,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1837 ||:  10%|#         | 11/110 [00:06<01:03,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1941 ||:  11%|#         | 12/110 [00:07<01:02,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1931 ||:  12%|#1        | 13/110 [00:08<00:59,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1944 ||:  13%|#2        | 14/110 [00:08<00:57,  1.66it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1918 ||:  14%|#3        | 15/110 [00:09<00:58,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1896 ||:  15%|#4        | 16/110 [00:10<00:57,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1871 ||:  15%|#5        | 17/110 [00:10<00:59,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1885 ||:  16%|#6        | 18/110 [00:11<00:58,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1893 ||:  17%|#7        | 19/110 [00:11<00:58,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1897 ||:  18%|#8        | 20/110 [00:12<00:58,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1930 ||:  19%|#9        | 21/110 [00:13<00:58,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1955 ||:  20%|##        | 22/110 [00:14<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1963 ||:  21%|##        | 23/110 [00:14<01:03,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1960 ||:  22%|##1       | 24/110 [00:15<00:59,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1948 ||:  23%|##2       | 25/110 [00:16<00:59,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1930 ||:  24%|##3       | 26/110 [00:16<00:57,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1947 ||:  25%|##4       | 27/110 [00:17<00:55,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1937 ||:  25%|##5       | 28/110 [00:18<00:53,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1922 ||:  26%|##6       | 29/110 [00:18<00:51,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1922 ||:  27%|##7       | 30/110 [00:19<00:54,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1923 ||:  28%|##8       | 31/110 [00:20<00:55,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1933 ||:  29%|##9       | 32/110 [00:21<00:57,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1923 ||:  30%|###       | 33/110 [00:21<00:57,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1941 ||:  31%|###       | 34/110 [00:22<00:53,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1936 ||:  32%|###1      | 35/110 [00:23<00:50,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1939 ||:  33%|###2      | 36/110 [00:23<00:51,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1947 ||:  34%|###3      | 37/110 [00:24<00:50,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1954 ||:  35%|###4      | 38/110 [00:25<00:50,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1974 ||:  35%|###5      | 39/110 [00:25<00:48,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1973 ||:  36%|###6      | 40/110 [00:26<00:45,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1967 ||:  37%|###7      | 41/110 [00:27<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1969 ||:  38%|###8      | 42/110 [00:27<00:45,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1965 ||:  39%|###9      | 43/110 [00:28<00:42,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1972 ||:  40%|####      | 44/110 [00:29<00:42,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1967 ||:  41%|####      | 45/110 [00:29<00:41,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1956 ||:  42%|####1     | 46/110 [00:30<00:40,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1961 ||:  43%|####2     | 47/110 [00:31<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1969 ||:  44%|####3     | 48/110 [00:31<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1966 ||:  45%|####4     | 49/110 [00:32<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1966 ||:  45%|####5     | 50/110 [00:33<00:54,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1963 ||:  46%|####6     | 51/110 [00:34<00:53,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1956 ||:  47%|####7     | 52/110 [00:35<00:50,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1961 ||:  48%|####8     | 53/110 [00:36<00:45,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1967 ||:  49%|####9     | 54/110 [00:36<00:41,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1961 ||:  50%|#####     | 55/110 [00:37<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1958 ||:  51%|#####     | 56/110 [00:38<00:37,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1950 ||:  52%|#####1    | 57/110 [00:38<00:35,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1963 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1960 ||:  54%|#####3    | 59/110 [00:40<00:35,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1953 ||:  55%|#####4    | 60/110 [00:40<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1958 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1961 ||:  56%|#####6    | 62/110 [00:41<00:31,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1965 ||:  57%|#####7    | 63/110 [00:42<00:29,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1964 ||:  58%|#####8    | 64/110 [00:43<00:29,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1958 ||:  59%|#####9    | 65/110 [00:43<00:29,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1951 ||:  60%|######    | 66/110 [00:44<00:30,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1952 ||:  61%|######    | 67/110 [00:45<00:29,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1949 ||:  62%|######1   | 68/110 [00:46<00:28,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1952 ||:  63%|######2   | 69/110 [00:46<00:26,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1949 ||:  64%|######3   | 70/110 [00:47<00:26,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1957 ||:  65%|######4   | 71/110 [00:47<00:25,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1948 ||:  65%|######5   | 72/110 [00:48<00:24,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1950 ||:  66%|######6   | 73/110 [00:49<00:23,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1955 ||:  67%|######7   | 74/110 [00:49<00:23,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1952 ||:  68%|######8   | 75/110 [00:50<00:22,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1954 ||:  69%|######9   | 76/110 [00:51<00:21,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1967 ||:  70%|#######   | 77/110 [00:51<00:21,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1969 ||:  71%|#######   | 78/110 [00:52<00:20,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1969 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1966 ||:  73%|#######2  | 80/110 [00:53<00:19,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1965 ||:  74%|#######3  | 81/110 [00:54<00:18,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1966 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1972 ||:  75%|#######5  | 83/110 [00:55<00:18,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1972 ||:  76%|#######6  | 84/110 [00:56<00:17,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1969 ||:  77%|#######7  | 85/110 [00:57<00:17,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1960 ||:  78%|#######8  | 86/110 [00:57<00:17,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1968 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1969 ||:  80%|########  | 88/110 [00:59<00:14,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1973 ||:  81%|########  | 89/110 [00:59<00:13,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1972 ||:  82%|########1 | 90/110 [01:00<00:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1974 ||:  83%|########2 | 91/110 [01:01<00:13,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1975 ||:  84%|########3 | 92/110 [01:02<00:12,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1979 ||:  85%|########4 | 93/110 [01:02<00:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1982 ||:  85%|########5 | 94/110 [01:03<00:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1985 ||:  86%|########6 | 95/110 [01:04<00:10,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1987 ||:  87%|########7 | 96/110 [01:04<00:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1992 ||:  88%|########8 | 97/110 [01:05<00:08,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1989 ||:  89%|########9 | 98/110 [01:06<00:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1992 ||:  90%|######### | 99/110 [01:06<00:07,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1990 ||:  91%|######### | 100/110 [01:07<00:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1992 ||:  92%|#########1| 101/110 [01:08<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1997 ||:  93%|#########2| 102/110 [01:08<00:05,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1993 ||:  94%|#########3| 103/110 [01:09<00:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1990 ||:  95%|#########4| 104/110 [01:10<00:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1989 ||:  95%|#########5| 105/110 [01:10<00:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1989 ||:  96%|#########6| 106/110 [01:11<00:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1994 ||:  97%|#########7| 107/110 [01:12<00:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1999 ||:  98%|#########8| 108/110 [01:12<00:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2000 ||:  99%|#########9| 109/110 [01:13<00:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1997 ||: 100%|##########| 110/110 [01:13<00:00,  1.71it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1997 ||: 100%|##########| 110/110 [01:13<00:00,  1.49it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7499, acc: 0.6562, no_result: 0.0312, loss: 0.7757 ||:   4%|4         | 1/24 [00:00<00:08,  2.64it/s]
+BLEU: 0.6595, acc: 0.6875, no_result: 0.0469, loss: 0.7798 ||:   8%|8         | 2/24 [00:00<00:08,  2.74it/s]
+BLEU: 0.6371, acc: 0.6667, no_result: 0.0938, loss: 0.8807 ||:  12%|#2        | 3/24 [00:01<00:07,  2.76it/s]
+BLEU: 0.6648, acc: 0.6719, no_result: 0.0859, loss: 0.8430 ||:  17%|#6        | 4/24 [00:01<00:07,  2.71it/s]
+BLEU: 0.6907, acc: 0.6875, no_result: 0.0875, loss: 0.8156 ||:  21%|##        | 5/24 [00:01<00:07,  2.68it/s]
+BLEU: 0.6905, acc: 0.6615, no_result: 0.0833, loss: 0.8364 ||:  25%|##5       | 6/24 [00:02<00:06,  2.77it/s]
+BLEU: 0.6970, acc: 0.6696, no_result: 0.0804, loss: 0.8278 ||:  29%|##9       | 7/24 [00:02<00:05,  2.85it/s]
+BLEU: 0.6946, acc: 0.6758, no_result: 0.0820, loss: 0.8342 ||:  33%|###3      | 8/24 [00:02<00:05,  2.75it/s]
+BLEU: 0.6862, acc: 0.6771, no_result: 0.0799, loss: 0.8296 ||:  38%|###7      | 9/24 [00:03<00:05,  2.63it/s]
+BLEU: 0.6946, acc: 0.6687, no_result: 0.0750, loss: 0.8375 ||:  42%|####1     | 10/24 [00:03<00:05,  2.55it/s]
+BLEU: 0.6969, acc: 0.6562, no_result: 0.0795, loss: 0.8687 ||:  46%|####5     | 11/24 [00:04<00:05,  2.42it/s]
+BLEU: 0.6940, acc: 0.6562, no_result: 0.0729, loss: 0.8661 ||:  50%|#####     | 12/24 [00:04<00:04,  2.49it/s]
+BLEU: 0.6839, acc: 0.6490, no_result: 0.0793, loss: 0.8865 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.37it/s]
+BLEU: 0.6860, acc: 0.6339, no_result: 0.0759, loss: 0.9199 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.27it/s]
+BLEU: 0.6663, acc: 0.6208, no_result: 0.0938, loss: 0.9720 ||:  62%|######2   | 15/24 [00:06<00:04,  2.09it/s]
+BLEU: 0.6577, acc: 0.6133, no_result: 0.1016, loss: 0.9926 ||:  67%|######6   | 16/24 [00:06<00:03,  2.02it/s]
+BLEU: 0.6566, acc: 0.6085, no_result: 0.1103, loss: 0.9871 ||:  71%|#######   | 17/24 [00:07<00:03,  2.09it/s]
+BLEU: 0.6560, acc: 0.6059, no_result: 0.1146, loss: 0.9960 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.09it/s]
+BLEU: 0.6604, acc: 0.6151, no_result: 0.1118, loss: 0.9744 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.20it/s]
+BLEU: 0.6638, acc: 0.6188, no_result: 0.1094, loss: 0.9693 ||:  83%|########3 | 20/24 [00:08<00:01,  2.32it/s]
+BLEU: 0.6599, acc: 0.6146, no_result: 0.1057, loss: 0.9732 ||:  88%|########7 | 21/24 [00:08<00:01,  2.32it/s]
+BLEU: 0.6624, acc: 0.6080, no_result: 0.1023, loss: 0.9703 ||:  92%|#########1| 22/24 [00:09<00:00,  2.30it/s]
+BLEU: 0.6659, acc: 0.6155, no_result: 0.1005, loss: 0.9526 ||:  96%|#########5| 23/24 [00:09<00:00,  2.38it/s]
+BLEU: 0.6651, acc: 0.6176, no_result: 0.0964, loss: 0.9562 ||: 100%|##########| 24/24 [00:09<00:00,  2.91it/s]
+BLEU: 0.6651, acc: 0.6176, no_result: 0.0964, loss: 0.9562 ||: 100%|##########| 24/24 [00:09<00:00,  2.46it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1406 ||:   1%|          | 1/110 [00:00<01:43,  1.05it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1717 ||:   2%|1         | 2/110 [00:01<01:24,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1663 ||:   3%|2         | 3/110 [00:02<01:18,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1553 ||:   4%|3         | 4/110 [00:02<01:15,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1629 ||:   5%|4         | 5/110 [00:03<01:12,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1655 ||:   5%|5         | 6/110 [00:04<01:14,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1704 ||:   6%|6         | 7/110 [00:05<01:15,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1696 ||:   7%|7         | 8/110 [00:05<01:17,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1767 ||:   8%|8         | 9/110 [00:06<01:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1707 ||:   9%|9         | 10/110 [00:07<01:13,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1769 ||:  10%|#         | 11/110 [00:08<01:12,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1738 ||:  11%|#         | 12/110 [00:08<01:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1705 ||:  12%|#1        | 13/110 [00:09<01:12,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1716 ||:  13%|#2        | 14/110 [00:10<01:09,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1687 ||:  14%|#3        | 15/110 [00:10<01:07,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1647 ||:  15%|#4        | 16/110 [00:11<01:09,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1671 ||:  15%|#5        | 17/110 [00:12<01:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1726 ||:  16%|#6        | 18/110 [00:13<01:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1755 ||:  17%|#7        | 19/110 [00:13<01:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1763 ||:  18%|#8        | 20/110 [00:14<01:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1791 ||:  19%|#9        | 21/110 [00:15<01:03,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1795 ||:  20%|##        | 22/110 [00:16<01:05,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1786 ||:  21%|##        | 23/110 [00:16<01:03,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1782 ||:  22%|##1       | 24/110 [00:17<01:01,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1760 ||:  23%|##2       | 25/110 [00:18<00:59,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1779 ||:  24%|##3       | 26/110 [00:18<00:59,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1771 ||:  25%|##4       | 27/110 [00:19<00:59,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1777 ||:  25%|##5       | 28/110 [00:20<00:58,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1795 ||:  26%|##6       | 29/110 [00:21<01:00,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1794 ||:  27%|##7       | 30/110 [00:21<00:57,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1802 ||:  28%|##8       | 31/110 [00:22<00:57,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1781 ||:  29%|##9       | 32/110 [00:23<00:54,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1820 ||:  30%|###       | 33/110 [00:23<00:53,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1838 ||:  31%|###       | 34/110 [00:24<00:51,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1854 ||:  32%|###1      | 35/110 [00:25<00:52,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1855 ||:  33%|###2      | 36/110 [00:25<00:51,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1858 ||:  34%|###3      | 37/110 [00:26<00:49,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1885 ||:  35%|###4      | 38/110 [00:27<00:49,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1889 ||:  35%|###5      | 39/110 [00:28<00:51,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1897 ||:  36%|###6      | 40/110 [00:29<01:06,  1.05it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1893 ||:  37%|###7      | 41/110 [00:30<01:01,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1905 ||:  38%|###8      | 42/110 [00:31<00:56,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1920 ||:  39%|###9      | 43/110 [00:31<00:55,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1919 ||:  40%|####      | 44/110 [00:32<00:52,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1933 ||:  41%|####      | 45/110 [00:33<00:49,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1934 ||:  42%|####1     | 46/110 [00:33<00:47,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1935 ||:  43%|####2     | 47/110 [00:34<00:47,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1926 ||:  44%|####3     | 48/110 [00:35<00:46,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1922 ||:  45%|####4     | 49/110 [00:36<00:45,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1914 ||:  45%|####5     | 50/110 [00:36<00:44,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1912 ||:  46%|####6     | 51/110 [00:37<00:43,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1915 ||:  47%|####7     | 52/110 [00:38<00:41,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1917 ||:  48%|####8     | 53/110 [00:39<00:40,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1914 ||:  49%|####9     | 54/110 [00:39<00:38,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1899 ||:  50%|#####     | 55/110 [00:40<00:37,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1914 ||:  51%|#####     | 56/110 [00:41<00:37,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1914 ||:  52%|#####1    | 57/110 [00:41<00:38,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1912 ||:  53%|#####2    | 58/110 [00:42<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1910 ||:  54%|#####3    | 59/110 [00:43<00:35,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1910 ||:  55%|#####4    | 60/110 [00:43<00:35,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1917 ||:  55%|#####5    | 61/110 [00:44<00:35,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1914 ||:  56%|#####6    | 62/110 [00:45<00:34,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1916 ||:  57%|#####7    | 63/110 [00:46<00:36,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1929 ||:  58%|#####8    | 64/110 [00:47<00:34,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1936 ||:  59%|#####9    | 65/110 [00:47<00:33,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1938 ||:  60%|######    | 66/110 [00:48<00:32,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1937 ||:  61%|######    | 67/110 [00:49<00:32,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1939 ||:  62%|######1   | 68/110 [00:49<00:30,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1936 ||:  63%|######2   | 69/110 [00:50<00:30,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1928 ||:  64%|######3   | 70/110 [00:51<00:29,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1935 ||:  65%|######4   | 71/110 [00:52<00:29,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1934 ||:  65%|######5   | 72/110 [00:52<00:27,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1932 ||:  66%|######6   | 73/110 [00:53<00:28,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1934 ||:  67%|######7   | 74/110 [00:54<00:29,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1935 ||:  68%|######8   | 75/110 [00:55<00:28,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1932 ||:  69%|######9   | 76/110 [00:56<00:27,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1931 ||:  70%|#######   | 77/110 [00:57<00:26,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1929 ||:  71%|#######   | 78/110 [00:57<00:24,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1922 ||:  72%|#######1  | 79/110 [00:58<00:22,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1920 ||:  73%|#######2  | 80/110 [00:59<00:22,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1916 ||:  74%|#######3  | 81/110 [01:00<00:22,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1915 ||:  75%|#######4  | 82/110 [01:00<00:21,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1915 ||:  75%|#######5  | 83/110 [01:01<00:20,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1907 ||:  76%|#######6  | 84/110 [01:02<00:19,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1911 ||:  77%|#######7  | 85/110 [01:02<00:18,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1906 ||:  78%|#######8  | 86/110 [01:03<00:17,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1905 ||:  79%|#######9  | 87/110 [01:04<00:17,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1914 ||:  80%|########  | 88/110 [01:05<00:16,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1911 ||:  81%|########  | 89/110 [01:05<00:15,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1910 ||:  82%|########1 | 90/110 [01:06<00:14,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1905 ||:  83%|########2 | 91/110 [01:07<00:13,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1910 ||:  84%|########3 | 92/110 [01:07<00:12,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1913 ||:  85%|########4 | 93/110 [01:08<00:12,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1917 ||:  85%|########5 | 94/110 [01:09<00:11,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1916 ||:  86%|########6 | 95/110 [01:10<00:11,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1916 ||:  87%|########7 | 96/110 [01:11<00:10,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1914 ||:  88%|########8 | 97/110 [01:11<00:09,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1912 ||:  89%|########9 | 98/110 [01:12<00:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1910 ||:  90%|######### | 99/110 [01:13<00:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1907 ||:  91%|######### | 100/110 [01:13<00:07,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1913 ||:  92%|#########1| 101/110 [01:14<00:06,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1913 ||:  93%|#########2| 102/110 [01:15<00:05,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1918 ||:  94%|#########3| 103/110 [01:16<00:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1923 ||:  95%|#########4| 104/110 [01:16<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1925 ||:  95%|#########5| 105/110 [01:17<00:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1924 ||:  96%|#########6| 106/110 [01:18<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1919 ||:  97%|#########7| 107/110 [01:18<00:02,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1918 ||:  98%|#########8| 108/110 [01:19<00:01,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1921 ||:  99%|#########9| 109/110 [01:20<00:00,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1921 ||: 100%|##########| 110/110 [01:20<00:00,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1921 ||: 100%|##########| 110/110 [01:20<00:00,  1.36it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7096, acc: 0.5938, no_result: 0.0938, loss: 0.7567 ||:   4%|4         | 1/24 [00:00<00:09,  2.36it/s]
+BLEU: 0.6675, acc: 0.6562, no_result: 0.0938, loss: 0.7330 ||:   8%|8         | 2/24 [00:00<00:09,  2.40it/s]
+BLEU: 0.6493, acc: 0.6458, no_result: 0.1250, loss: 0.8534 ||:  12%|#2        | 3/24 [00:01<00:08,  2.42it/s]
+BLEU: 0.6778, acc: 0.6641, no_result: 0.1328, loss: 0.8351 ||:  17%|#6        | 4/24 [00:01<00:08,  2.37it/s]
+BLEU: 0.7086, acc: 0.6875, no_result: 0.1313, loss: 0.8024 ||:  21%|##        | 5/24 [00:02<00:08,  2.34it/s]
+BLEU: 0.6982, acc: 0.6562, no_result: 0.1198, loss: 0.8250 ||:  25%|##5       | 6/24 [00:02<00:07,  2.46it/s]
+BLEU: 0.7007, acc: 0.6741, no_result: 0.1116, loss: 0.8107 ||:  29%|##9       | 7/24 [00:02<00:06,  2.56it/s]
+BLEU: 0.6987, acc: 0.6875, no_result: 0.1133, loss: 0.8244 ||:  33%|###3      | 8/24 [00:03<00:06,  2.57it/s]
+BLEU: 0.6904, acc: 0.6875, no_result: 0.1076, loss: 0.8097 ||:  38%|###7      | 9/24 [00:03<00:05,  2.55it/s]
+BLEU: 0.7006, acc: 0.6781, no_result: 0.1000, loss: 0.8267 ||:  42%|####1     | 10/24 [00:04<00:05,  2.50it/s]
+BLEU: 0.7001, acc: 0.6619, no_result: 0.0966, loss: 0.8671 ||:  46%|####5     | 11/24 [00:04<00:05,  2.39it/s]
+BLEU: 0.6940, acc: 0.6589, no_result: 0.0885, loss: 0.8673 ||:  50%|#####     | 12/24 [00:04<00:04,  2.45it/s]
+BLEU: 0.6895, acc: 0.6562, no_result: 0.0938, loss: 0.8890 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.37it/s]
+BLEU: 0.6924, acc: 0.6406, no_result: 0.0960, loss: 0.9191 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.27it/s]
+BLEU: 0.6745, acc: 0.6188, no_result: 0.1167, loss: 0.9766 ||:  62%|######2   | 15/24 [00:06<00:04,  2.09it/s]
+BLEU: 0.6687, acc: 0.6113, no_result: 0.1211, loss: 0.9981 ||:  67%|######6   | 16/24 [00:06<00:03,  2.05it/s]
+BLEU: 0.6686, acc: 0.6066, no_result: 0.1250, loss: 0.9865 ||:  71%|#######   | 17/24 [00:07<00:03,  2.11it/s]
+BLEU: 0.6668, acc: 0.6042, no_result: 0.1250, loss: 0.9961 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.12it/s]
+BLEU: 0.6707, acc: 0.6135, no_result: 0.1201, loss: 0.9761 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.21it/s]
+BLEU: 0.6727, acc: 0.6188, no_result: 0.1172, loss: 0.9657 ||:  83%|########3 | 20/24 [00:08<00:01,  2.33it/s]
+BLEU: 0.6658, acc: 0.6146, no_result: 0.1146, loss: 0.9722 ||:  88%|########7 | 21/24 [00:09<00:01,  2.33it/s]
+BLEU: 0.6695, acc: 0.6094, no_result: 0.1151, loss: 0.9669 ||:  92%|#########1| 22/24 [00:09<00:00,  2.30it/s]
+BLEU: 0.6732, acc: 0.6196, no_result: 0.1114, loss: 0.9495 ||:  96%|#########5| 23/24 [00:09<00:00,  2.40it/s]
+BLEU: 0.6726, acc: 0.6215, no_result: 0.1068, loss: 0.9551 ||: 100%|##########| 24/24 [00:10<00:00,  2.94it/s]
+BLEU: 0.6726, acc: 0.6215, no_result: 0.1068, loss: 0.9551 ||: 100%|##########| 24/24 [00:10<00:00,  2.40it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2159 ||:   1%|          | 1/110 [00:00<01:25,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2071 ||:   2%|1         | 2/110 [00:01<01:22,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1852 ||:   3%|2         | 3/110 [00:02<01:18,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1803 ||:   4%|3         | 4/110 [00:02<01:16,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1745 ||:   5%|4         | 5/110 [00:03<01:11,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1752 ||:   5%|5         | 6/110 [00:04<01:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1684 ||:   6%|6         | 7/110 [00:04<01:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1676 ||:   7%|7         | 8/110 [00:05<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1653 ||:   8%|8         | 9/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1638 ||:   9%|9         | 10/110 [00:06<01:04,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1648 ||:  10%|#         | 11/110 [00:07<01:11,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1608 ||:  11%|#         | 12/110 [00:08<01:07,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1645 ||:  12%|#1        | 13/110 [00:08<01:05,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1654 ||:  13%|#2        | 14/110 [00:09<01:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1654 ||:  14%|#3        | 15/110 [00:10<01:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1626 ||:  15%|#4        | 16/110 [00:10<01:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1621 ||:  15%|#5        | 17/110 [00:11<01:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1640 ||:  16%|#6        | 18/110 [00:12<01:03,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1614 ||:  17%|#7        | 19/110 [00:13<01:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1596 ||:  18%|#8        | 20/110 [00:13<01:04,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1592 ||:  19%|#9        | 21/110 [00:14<01:04,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1560 ||:  20%|##        | 22/110 [00:15<01:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1555 ||:  21%|##        | 23/110 [00:15<01:03,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1523 ||:  22%|##1       | 24/110 [00:16<01:00,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1499 ||:  23%|##2       | 25/110 [00:17<01:00,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1544 ||:  24%|##3       | 26/110 [00:17<00:57,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1540 ||:  25%|##4       | 27/110 [00:18<00:56,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1552 ||:  25%|##5       | 28/110 [00:19<00:54,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1577 ||:  26%|##6       | 29/110 [00:20<00:55,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1573 ||:  27%|##7       | 30/110 [00:21<01:22,  1.03s/it]
+acc: 0.0000, no_result: 0.0000, loss: 0.1573 ||:  28%|##8       | 31/110 [00:22<01:12,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1561 ||:  29%|##9       | 32/110 [00:23<01:07,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1572 ||:  30%|###       | 33/110 [00:23<01:03,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1586 ||:  31%|###       | 34/110 [00:24<01:02,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1582 ||:  32%|###1      | 35/110 [00:25<00:57,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1603 ||:  33%|###2      | 36/110 [00:26<00:54,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1600 ||:  34%|###3      | 37/110 [00:26<00:54,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1613 ||:  35%|###4      | 38/110 [00:27<00:52,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1612 ||:  35%|###5      | 39/110 [00:28<00:49,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1619 ||:  36%|###6      | 40/110 [00:28<00:49,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1626 ||:  37%|###7      | 41/110 [00:29<00:48,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1611 ||:  38%|###8      | 42/110 [00:30<00:45,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1611 ||:  39%|###9      | 43/110 [00:30<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1599 ||:  40%|####      | 44/110 [00:31<00:45,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1595 ||:  41%|####      | 45/110 [00:32<00:44,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1600 ||:  42%|####1     | 46/110 [00:32<00:44,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1604 ||:  43%|####2     | 47/110 [00:33<00:43,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1598 ||:  44%|####3     | 48/110 [00:34<00:42,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1609 ||:  45%|####4     | 49/110 [00:35<00:42,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1605 ||:  45%|####5     | 50/110 [00:35<00:40,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1607 ||:  46%|####6     | 51/110 [00:36<00:39,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1619 ||:  47%|####7     | 52/110 [00:37<00:39,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1618 ||:  48%|####8     | 53/110 [00:37<00:40,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1619 ||:  49%|####9     | 54/110 [00:38<00:40,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1630 ||:  50%|#####     | 55/110 [00:39<00:39,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1631 ||:  51%|#####     | 56/110 [00:39<00:37,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1633 ||:  52%|#####1    | 57/110 [00:40<00:39,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1635 ||:  53%|#####2    | 58/110 [00:41<00:38,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1626 ||:  54%|#####3    | 59/110 [00:42<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1625 ||:  55%|#####4    | 60/110 [00:42<00:34,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1629 ||:  55%|#####5    | 61/110 [00:43<00:33,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1642 ||:  56%|#####6    | 62/110 [00:44<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1645 ||:  57%|#####7    | 63/110 [00:44<00:33,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1645 ||:  58%|#####8    | 64/110 [00:45<00:32,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1648 ||:  59%|#####9    | 65/110 [00:46<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1650 ||:  60%|######    | 66/110 [00:46<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1647 ||:  61%|######    | 67/110 [00:47<00:29,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1659 ||:  62%|######1   | 68/110 [00:48<00:29,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1674 ||:  63%|######2   | 69/110 [00:49<00:29,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1670 ||:  64%|######3   | 70/110 [00:49<00:28,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1669 ||:  65%|######4   | 71/110 [00:50<00:28,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1668 ||:  65%|######5   | 72/110 [00:51<00:27,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1666 ||:  66%|######6   | 73/110 [00:51<00:25,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1669 ||:  67%|######7   | 74/110 [00:52<00:26,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1670 ||:  68%|######8   | 75/110 [00:53<00:24,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1672 ||:  69%|######9   | 76/110 [00:54<00:23,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1678 ||:  70%|#######   | 77/110 [00:54<00:22,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1682 ||:  71%|#######   | 78/110 [00:55<00:21,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1680 ||:  72%|#######1  | 79/110 [00:56<00:22,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1682 ||:  73%|#######2  | 80/110 [00:56<00:22,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1686 ||:  74%|#######3  | 81/110 [00:57<00:20,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1697 ||:  75%|#######4  | 82/110 [00:58<00:19,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1695 ||:  75%|#######5  | 83/110 [00:58<00:17,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1700 ||:  76%|#######6  | 84/110 [00:59<00:18,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1701 ||:  77%|#######7  | 85/110 [01:00<00:17,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1698 ||:  78%|#######8  | 86/110 [01:01<00:17,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1704 ||:  79%|#######9  | 87/110 [01:01<00:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1707 ||:  80%|########  | 88/110 [01:02<00:15,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1714 ||:  81%|########  | 89/110 [01:03<00:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1722 ||:  82%|########1 | 90/110 [01:03<00:13,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1728 ||:  83%|########2 | 91/110 [01:04<00:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1731 ||:  84%|########3 | 92/110 [01:05<00:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1732 ||:  85%|########4 | 93/110 [01:05<00:10,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1733 ||:  85%|########5 | 94/110 [01:06<00:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1734 ||:  86%|########6 | 95/110 [01:07<00:10,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1741 ||:  87%|########7 | 96/110 [01:08<00:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1750 ||:  88%|########8 | 97/110 [01:08<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1743 ||:  89%|########9 | 98/110 [01:09<00:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1744 ||:  90%|######### | 99/110 [01:10<00:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1753 ||:  91%|######### | 100/110 [01:10<00:07,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1762 ||:  92%|#########1| 101/110 [01:11<00:06,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1761 ||:  93%|#########2| 102/110 [01:12<00:05,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1760 ||:  94%|#########3| 103/110 [01:13<00:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1763 ||:  95%|#########4| 104/110 [01:13<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1760 ||:  95%|#########5| 105/110 [01:14<00:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1764 ||:  96%|#########6| 106/110 [01:15<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1761 ||:  97%|#########7| 107/110 [01:15<00:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1768 ||:  98%|#########8| 108/110 [01:16<00:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1780 ||:  99%|#########9| 109/110 [01:17<00:00,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1777 ||: 100%|##########| 110/110 [01:17<00:00,  1.76it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1777 ||: 100%|##########| 110/110 [01:17<00:00,  1.42it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6624, acc: 0.6562, no_result: 0.0312, loss: 0.8175 ||:   4%|4         | 1/24 [00:00<00:09,  2.41it/s]
+BLEU: 0.6550, acc: 0.7031, no_result: 0.0469, loss: 0.7905 ||:   8%|8         | 2/24 [00:00<00:08,  2.45it/s]
+BLEU: 0.6440, acc: 0.6875, no_result: 0.0938, loss: 0.8937 ||:  12%|#2        | 3/24 [00:01<00:08,  2.48it/s]
+BLEU: 0.6718, acc: 0.6953, no_result: 0.1016, loss: 0.8455 ||:  17%|#6        | 4/24 [00:01<00:08,  2.40it/s]
+BLEU: 0.7008, acc: 0.7000, no_result: 0.0938, loss: 0.8278 ||:  21%|##        | 5/24 [00:02<00:08,  2.36it/s]
+BLEU: 0.6939, acc: 0.6667, no_result: 0.0938, loss: 0.8414 ||:  25%|##5       | 6/24 [00:02<00:07,  2.44it/s]
+BLEU: 0.7050, acc: 0.6830, no_result: 0.0893, loss: 0.8256 ||:  29%|##9       | 7/24 [00:02<00:06,  2.54it/s]
+BLEU: 0.6993, acc: 0.6836, no_result: 0.0898, loss: 0.8418 ||:  33%|###3      | 8/24 [00:03<00:06,  2.56it/s]
+BLEU: 0.6912, acc: 0.6910, no_result: 0.0868, loss: 0.8387 ||:  38%|###7      | 9/24 [00:03<00:05,  2.52it/s]
+BLEU: 0.6981, acc: 0.6813, no_result: 0.0844, loss: 0.8519 ||:  42%|####1     | 10/24 [00:04<00:05,  2.49it/s]
+BLEU: 0.6992, acc: 0.6648, no_result: 0.0938, loss: 0.8906 ||:  46%|####5     | 11/24 [00:04<00:05,  2.37it/s]
+BLEU: 0.6959, acc: 0.6667, no_result: 0.0859, loss: 0.8899 ||:  50%|#####     | 12/24 [00:04<00:04,  2.44it/s]
+BLEU: 0.6874, acc: 0.6587, no_result: 0.0913, loss: 0.9140 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.35it/s]
+BLEU: 0.6882, acc: 0.6496, no_result: 0.0893, loss: 0.9515 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.24it/s]
+BLEU: 0.6772, acc: 0.6312, no_result: 0.1062, loss: 1.0007 ||:  62%|######2   | 15/24 [00:06<00:04,  2.09it/s]
+BLEU: 0.6766, acc: 0.6230, no_result: 0.1113, loss: 1.0216 ||:  67%|######6   | 16/24 [00:06<00:03,  2.05it/s]
+BLEU: 0.6737, acc: 0.6176, no_result: 0.1176, loss: 1.0191 ||:  71%|#######   | 17/24 [00:07<00:03,  2.12it/s]
+BLEU: 0.6699, acc: 0.6163, no_result: 0.1233, loss: 1.0284 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.10it/s]
+BLEU: 0.6736, acc: 0.6250, no_result: 0.1184, loss: 1.0096 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.20it/s]
+BLEU: 0.6751, acc: 0.6297, no_result: 0.1156, loss: 1.0032 ||:  83%|########3 | 20/24 [00:08<00:01,  2.32it/s]
+BLEU: 0.6717, acc: 0.6250, no_result: 0.1146, loss: 1.0042 ||:  88%|########7 | 21/24 [00:09<00:01,  2.31it/s]
+BLEU: 0.6744, acc: 0.6236, no_result: 0.1108, loss: 0.9988 ||:  92%|#########1| 22/24 [00:09<00:00,  2.30it/s]
+BLEU: 0.6772, acc: 0.6277, no_result: 0.1087, loss: 0.9824 ||:  96%|#########5| 23/24 [00:09<00:00,  2.39it/s]
+BLEU: 0.6769, acc: 0.6247, no_result: 0.1088, loss: 0.9989 ||: 100%|##########| 24/24 [00:10<00:00,  2.93it/s]
+BLEU: 0.6769, acc: 0.6247, no_result: 0.1088, loss: 0.9989 ||: 100%|##########| 24/24 [00:10<00:00,  2.39it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1651 ||:   1%|          | 1/110 [00:00<01:13,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1413 ||:   2%|1         | 2/110 [00:01<01:16,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1258 ||:   3%|2         | 3/110 [00:02<01:11,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1381 ||:   4%|3         | 4/110 [00:02<01:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1444 ||:   5%|4         | 5/110 [00:03<01:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1436 ||:   5%|5         | 6/110 [00:04<01:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1472 ||:   6%|6         | 7/110 [00:04<01:14,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1560 ||:   7%|7         | 8/110 [00:05<01:21,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1607 ||:   8%|8         | 9/110 [00:06<01:20,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1700 ||:   9%|9         | 10/110 [00:07<01:18,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1661 ||:  10%|#         | 11/110 [00:08<01:14,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1675 ||:  11%|#         | 12/110 [00:08<01:14,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1650 ||:  12%|#1        | 13/110 [00:09<01:13,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1631 ||:  13%|#2        | 14/110 [00:10<01:09,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1632 ||:  14%|#3        | 15/110 [00:11<01:08,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1668 ||:  15%|#4        | 16/110 [00:11<01:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1647 ||:  15%|#5        | 17/110 [00:12<01:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1670 ||:  16%|#6        | 18/110 [00:13<01:03,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1665 ||:  17%|#7        | 19/110 [00:13<01:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1691 ||:  18%|#8        | 20/110 [00:15<01:23,  1.08it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1691 ||:  19%|#9        | 21/110 [00:15<01:15,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1685 ||:  20%|##        | 22/110 [00:16<01:12,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1689 ||:  21%|##        | 23/110 [00:17<01:08,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1684 ||:  22%|##1       | 24/110 [00:18<01:04,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1711 ||:  23%|##2       | 25/110 [00:18<01:01,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1717 ||:  24%|##3       | 26/110 [00:19<00:58,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1724 ||:  25%|##4       | 27/110 [00:19<00:56,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1715 ||:  25%|##5       | 28/110 [00:20<00:55,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1733 ||:  26%|##6       | 29/110 [00:21<00:53,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1732 ||:  27%|##7       | 30/110 [00:21<00:54,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1733 ||:  28%|##8       | 31/110 [00:22<00:52,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1723 ||:  29%|##9       | 32/110 [00:23<00:52,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1719 ||:  30%|###       | 33/110 [00:23<00:52,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1731 ||:  31%|###       | 34/110 [00:24<00:51,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1739 ||:  32%|###1      | 35/110 [00:25<00:54,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1746 ||:  33%|###2      | 36/110 [00:26<00:51,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1741 ||:  34%|###3      | 37/110 [00:26<00:51,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1763 ||:  35%|###4      | 38/110 [00:27<00:51,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1759 ||:  35%|###5      | 39/110 [00:28<00:49,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1761 ||:  36%|###6      | 40/110 [00:28<00:47,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1751 ||:  37%|###7      | 41/110 [00:29<00:48,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1744 ||:  38%|###8      | 42/110 [00:30<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1740 ||:  39%|###9      | 43/110 [00:30<00:44,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1752 ||:  40%|####      | 44/110 [00:31<00:43,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1746 ||:  41%|####      | 45/110 [00:32<00:43,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1730 ||:  42%|####1     | 46/110 [00:32<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1730 ||:  43%|####2     | 47/110 [00:33<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1720 ||:  44%|####3     | 48/110 [00:34<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1721 ||:  45%|####4     | 49/110 [00:34<00:40,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1728 ||:  45%|####5     | 50/110 [00:35<00:42,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1742 ||:  46%|####6     | 51/110 [00:36<00:40,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1737 ||:  47%|####7     | 52/110 [00:37<00:40,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1752 ||:  48%|####8     | 53/110 [00:37<00:38,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1742 ||:  49%|####9     | 54/110 [00:38<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1745 ||:  50%|#####     | 55/110 [00:39<00:39,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1744 ||:  51%|#####     | 56/110 [00:40<00:40,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1741 ||:  52%|#####1    | 57/110 [00:40<00:38,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1744 ||:  53%|#####2    | 58/110 [00:41<00:37,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1738 ||:  54%|#####3    | 59/110 [00:42<00:37,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1747 ||:  55%|#####4    | 60/110 [00:42<00:36,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1746 ||:  55%|#####5    | 61/110 [00:43<00:33,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1744 ||:  56%|#####6    | 62/110 [00:44<00:32,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1749 ||:  57%|#####7    | 63/110 [00:44<00:31,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1755 ||:  58%|#####8    | 64/110 [00:45<00:33,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1757 ||:  59%|#####9    | 65/110 [00:46<00:31,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1751 ||:  60%|######    | 66/110 [00:46<00:29,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1758 ||:  61%|######    | 67/110 [00:47<00:29,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1761 ||:  62%|######1   | 68/110 [00:48<00:30,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1765 ||:  63%|######2   | 69/110 [00:49<00:29,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1767 ||:  64%|######3   | 70/110 [00:49<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1761 ||:  65%|######4   | 71/110 [00:50<00:28,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1763 ||:  65%|######5   | 72/110 [00:51<00:27,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1758 ||:  66%|######6   | 73/110 [00:51<00:26,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1755 ||:  67%|######7   | 74/110 [00:52<00:25,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1762 ||:  68%|######8   | 75/110 [00:53<00:23,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1767 ||:  69%|######9   | 76/110 [00:53<00:22,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1772 ||:  70%|#######   | 77/110 [00:54<00:22,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1776 ||:  71%|#######   | 78/110 [00:55<00:22,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1782 ||:  72%|#######1  | 79/110 [00:56<00:21,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1774 ||:  73%|#######2  | 80/110 [00:56<00:21,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1771 ||:  74%|#######3  | 81/110 [00:57<00:21,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1769 ||:  75%|#######4  | 82/110 [00:58<00:21,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1770 ||:  75%|#######5  | 83/110 [00:59<00:22,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1762 ||:  76%|#######6  | 84/110 [01:00<00:19,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1764 ||:  77%|#######7  | 85/110 [01:00<00:18,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1762 ||:  78%|#######8  | 86/110 [01:01<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1767 ||:  79%|#######9  | 87/110 [01:02<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1764 ||:  80%|########  | 88/110 [01:02<00:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1767 ||:  81%|########  | 89/110 [01:03<00:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1768 ||:  82%|########1 | 90/110 [01:04<00:13,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1768 ||:  83%|########2 | 91/110 [01:04<00:13,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1769 ||:  84%|########3 | 92/110 [01:05<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1768 ||:  85%|########4 | 93/110 [01:06<00:11,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1766 ||:  85%|########5 | 94/110 [01:06<00:10,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1762 ||:  86%|########6 | 95/110 [01:07<00:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1771 ||:  87%|########7 | 96/110 [01:08<00:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1779 ||:  88%|########8 | 97/110 [01:08<00:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1779 ||:  89%|########9 | 98/110 [01:09<00:08,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1779 ||:  90%|######### | 99/110 [01:10<00:07,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1780 ||:  91%|######### | 100/110 [01:10<00:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1790 ||:  92%|#########1| 101/110 [01:11<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1790 ||:  93%|#########2| 102/110 [01:12<00:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1789 ||:  94%|#########3| 103/110 [01:12<00:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1792 ||:  95%|#########4| 104/110 [01:13<00:04,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1800 ||:  95%|#########5| 105/110 [01:14<00:03,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1797 ||:  96%|#########6| 106/110 [01:15<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1805 ||:  97%|#########7| 107/110 [01:15<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1804 ||:  98%|#########8| 108/110 [01:16<00:01,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1806 ||:  99%|#########9| 109/110 [01:17<00:00,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1800 ||: 100%|##########| 110/110 [01:17<00:00,  1.72it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1800 ||: 100%|##########| 110/110 [01:17<00:00,  1.42it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6620, acc: 0.7188, no_result: 0.0625, loss: 0.7854 ||:   4%|4         | 1/24 [00:00<00:09,  2.36it/s]
+BLEU: 0.6554, acc: 0.7344, no_result: 0.0469, loss: 0.7379 ||:   8%|8         | 2/24 [00:00<00:09,  2.41it/s]
+BLEU: 0.6510, acc: 0.6771, no_result: 0.0938, loss: 0.8525 ||:  12%|#2        | 3/24 [00:01<00:08,  2.38it/s]
+BLEU: 0.6801, acc: 0.6719, no_result: 0.1172, loss: 0.8276 ||:  17%|#6        | 4/24 [00:01<00:08,  2.32it/s]
+BLEU: 0.7052, acc: 0.7000, no_result: 0.1000, loss: 0.8142 ||:  21%|##        | 5/24 [00:02<00:08,  2.32it/s]
+BLEU: 0.7011, acc: 0.6667, no_result: 0.0938, loss: 0.8183 ||:  25%|##5       | 6/24 [00:02<00:07,  2.40it/s]
+BLEU: 0.7065, acc: 0.6786, no_result: 0.0938, loss: 0.8123 ||:  29%|##9       | 7/24 [00:02<00:06,  2.45it/s]
+BLEU: 0.7004, acc: 0.6758, no_result: 0.0938, loss: 0.8254 ||:  33%|###3      | 8/24 [00:03<00:06,  2.47it/s]
+BLEU: 0.6876, acc: 0.6771, no_result: 0.0972, loss: 0.8194 ||:  38%|###7      | 9/24 [00:03<00:06,  2.45it/s]
+BLEU: 0.6990, acc: 0.6625, no_result: 0.1000, loss: 0.8347 ||:  42%|####1     | 10/24 [00:04<00:05,  2.51it/s]
+BLEU: 0.6999, acc: 0.6562, no_result: 0.0994, loss: 0.8798 ||:  46%|####5     | 11/24 [00:04<00:05,  2.52it/s]
+BLEU: 0.6933, acc: 0.6562, no_result: 0.0964, loss: 0.8838 ||:  50%|#####     | 12/24 [00:04<00:04,  2.66it/s]
+BLEU: 0.6859, acc: 0.6514, no_result: 0.0986, loss: 0.9091 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.62it/s]
+BLEU: 0.6882, acc: 0.6384, no_result: 0.0960, loss: 0.9313 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.57it/s]
+BLEU: 0.6718, acc: 0.6229, no_result: 0.1146, loss: 0.9825 ||:  62%|######2   | 15/24 [00:06<00:03,  2.41it/s]
+BLEU: 0.6682, acc: 0.6211, no_result: 0.1172, loss: 1.0054 ||:  67%|######6   | 16/24 [00:06<00:03,  2.39it/s]
+BLEU: 0.6657, acc: 0.6158, no_result: 0.1268, loss: 1.0033 ||:  71%|#######   | 17/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.6662, acc: 0.6111, no_result: 0.1337, loss: 1.0138 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.43it/s]
+BLEU: 0.6696, acc: 0.6168, no_result: 0.1299, loss: 0.9953 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.6690, acc: 0.6234, no_result: 0.1266, loss: 0.9884 ||:  83%|########3 | 20/24 [00:08<00:01,  2.67it/s]
+BLEU: 0.6646, acc: 0.6205, no_result: 0.1220, loss: 0.9902 ||:  88%|########7 | 21/24 [00:08<00:01,  2.67it/s]
+BLEU: 0.6682, acc: 0.6151, no_result: 0.1193, loss: 0.9893 ||:  92%|#########1| 22/24 [00:08<00:00,  2.62it/s]
+BLEU: 0.6744, acc: 0.6236, no_result: 0.1168, loss: 0.9732 ||:  96%|#########5| 23/24 [00:09<00:00,  2.72it/s]
+BLEU: 0.6739, acc: 0.6208, no_result: 0.1166, loss: 0.9844 ||: 100%|##########| 24/24 [00:09<00:00,  3.36it/s]
+BLEU: 0.6739, acc: 0.6208, no_result: 0.1166, loss: 0.9844 ||: 100%|##########| 24/24 [00:09<00:00,  2.59it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1231 ||:   1%|          | 1/110 [00:00<01:23,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1752 ||:   2%|1         | 2/110 [00:01<01:13,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1680 ||:   3%|2         | 3/110 [00:02<01:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1720 ||:   4%|3         | 4/110 [00:02<01:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1688 ||:   5%|4         | 5/110 [00:03<01:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1757 ||:   5%|5         | 6/110 [00:04<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1707 ||:   6%|6         | 7/110 [00:04<01:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1714 ||:   7%|7         | 8/110 [00:05<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1681 ||:   8%|8         | 9/110 [00:06<01:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1734 ||:   9%|9         | 10/110 [00:07<01:37,  1.03it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1722 ||:  10%|#         | 11/110 [00:08<01:28,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1749 ||:  11%|#         | 12/110 [00:09<01:23,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1731 ||:  12%|#1        | 13/110 [00:09<01:17,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1704 ||:  13%|#2        | 14/110 [00:10<01:10,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1703 ||:  14%|#3        | 15/110 [00:11<01:09,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1702 ||:  15%|#4        | 16/110 [00:11<01:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1699 ||:  15%|#5        | 17/110 [00:12<01:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1727 ||:  16%|#6        | 18/110 [00:13<01:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1771 ||:  17%|#7        | 19/110 [00:13<01:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1766 ||:  18%|#8        | 20/110 [00:14<01:07,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1764 ||:  19%|#9        | 21/110 [00:15<01:05,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1763 ||:  20%|##        | 22/110 [00:16<01:02,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1735 ||:  21%|##        | 23/110 [00:17<01:07,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1747 ||:  22%|##1       | 24/110 [00:17<01:04,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1746 ||:  23%|##2       | 25/110 [00:18<01:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1751 ||:  24%|##3       | 26/110 [00:19<01:00,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1739 ||:  25%|##4       | 27/110 [00:19<00:59,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1739 ||:  25%|##5       | 28/110 [00:20<00:55,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1749 ||:  26%|##6       | 29/110 [00:21<00:58,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1735 ||:  27%|##7       | 30/110 [00:22<00:57,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1722 ||:  28%|##8       | 31/110 [00:22<00:59,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1737 ||:  29%|##9       | 32/110 [00:23<00:56,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1743 ||:  30%|###       | 33/110 [00:24<00:57,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1738 ||:  31%|###       | 34/110 [00:24<00:54,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1721 ||:  32%|###1      | 35/110 [00:25<00:56,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1731 ||:  33%|###2      | 36/110 [00:26<00:53,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1728 ||:  34%|###3      | 37/110 [00:27<00:51,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1718 ||:  35%|###4      | 38/110 [00:27<00:50,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1725 ||:  35%|###5      | 39/110 [00:28<00:51,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1716 ||:  36%|###6      | 40/110 [00:29<00:49,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1723 ||:  37%|###7      | 41/110 [00:29<00:46,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1726 ||:  38%|###8      | 42/110 [00:30<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1730 ||:  39%|###9      | 43/110 [00:31<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1732 ||:  40%|####      | 44/110 [00:31<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1745 ||:  41%|####      | 45/110 [00:32<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1753 ||:  42%|####1     | 46/110 [00:33<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1745 ||:  43%|####2     | 47/110 [00:33<00:43,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1740 ||:  44%|####3     | 48/110 [00:34<00:44,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1745 ||:  45%|####4     | 49/110 [00:35<00:42,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1755 ||:  45%|####5     | 50/110 [00:36<00:41,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1760 ||:  46%|####6     | 51/110 [00:36<00:39,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1762 ||:  47%|####7     | 52/110 [00:37<00:38,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1760 ||:  48%|####8     | 53/110 [00:38<00:38,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1760 ||:  49%|####9     | 54/110 [00:38<00:37,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1759 ||:  50%|#####     | 55/110 [00:39<00:37,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1777 ||:  51%|#####     | 56/110 [00:40<00:37,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1781 ||:  52%|#####1    | 57/110 [00:40<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1792 ||:  53%|#####2    | 58/110 [00:41<00:35,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1783 ||:  54%|#####3    | 59/110 [00:42<00:35,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1800 ||:  55%|#####4    | 60/110 [00:42<00:33,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1808 ||:  55%|#####5    | 61/110 [00:43<00:33,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1805 ||:  56%|#####6    | 62/110 [00:44<00:33,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1811 ||:  57%|#####7    | 63/110 [00:44<00:32,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1807 ||:  58%|#####8    | 64/110 [00:45<00:33,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1805 ||:  59%|#####9    | 65/110 [00:46<00:33,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1801 ||:  60%|######    | 66/110 [00:47<00:31,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1792 ||:  61%|######    | 67/110 [00:47<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1787 ||:  62%|######1   | 68/110 [00:48<00:31,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1781 ||:  63%|######2   | 69/110 [00:49<00:29,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1781 ||:  64%|######3   | 70/110 [00:50<00:28,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1779 ||:  65%|######4   | 71/110 [00:50<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1782 ||:  65%|######5   | 72/110 [00:51<00:26,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1789 ||:  66%|######6   | 73/110 [00:52<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1779 ||:  67%|######7   | 74/110 [00:52<00:24,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1776 ||:  68%|######8   | 75/110 [00:53<00:24,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1775 ||:  69%|######9   | 76/110 [00:54<00:24,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1774 ||:  70%|#######   | 77/110 [00:54<00:23,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1771 ||:  71%|#######   | 78/110 [00:55<00:21,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1769 ||:  72%|#######1  | 79/110 [00:56<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1768 ||:  73%|#######2  | 80/110 [00:56<00:19,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1765 ||:  74%|#######3  | 81/110 [00:57<00:20,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1765 ||:  75%|#######4  | 82/110 [00:58<00:19,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1768 ||:  75%|#######5  | 83/110 [00:58<00:18,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1767 ||:  76%|#######6  | 84/110 [00:59<00:16,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1771 ||:  77%|#######7  | 85/110 [01:00<00:16,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1765 ||:  78%|#######8  | 86/110 [01:00<00:15,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1763 ||:  79%|#######9  | 87/110 [01:01<00:14,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1765 ||:  80%|########  | 88/110 [01:02<00:14,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1771 ||:  81%|########  | 89/110 [01:02<00:13,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1772 ||:  82%|########1 | 90/110 [01:03<00:13,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1777 ||:  83%|########2 | 91/110 [01:03<00:12,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1771 ||:  84%|########3 | 92/110 [01:04<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1763 ||:  85%|########4 | 93/110 [01:05<00:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1764 ||:  85%|########5 | 94/110 [01:06<00:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1760 ||:  86%|########6 | 95/110 [01:06<00:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1762 ||:  87%|########7 | 96/110 [01:07<00:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1762 ||:  88%|########8 | 97/110 [01:08<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1755 ||:  89%|########9 | 98/110 [01:08<00:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1751 ||:  90%|######### | 99/110 [01:09<00:07,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1746 ||:  91%|######### | 100/110 [01:10<00:07,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1737 ||:  92%|#########1| 101/110 [01:10<00:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1735 ||:  93%|#########2| 102/110 [01:11<00:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1734 ||:  94%|#########3| 103/110 [01:12<00:04,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1734 ||:  95%|#########4| 104/110 [01:12<00:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1735 ||:  95%|#########5| 105/110 [01:13<00:03,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1738 ||:  96%|#########6| 106/110 [01:14<00:03,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1736 ||:  97%|#########7| 107/110 [01:15<00:02,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1738 ||:  98%|#########8| 108/110 [01:16<00:01,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1736 ||:  99%|#########9| 109/110 [01:16<00:00,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1743 ||: 100%|##########| 110/110 [01:17<00:00,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1743 ||: 100%|##########| 110/110 [01:17<00:00,  1.41it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6820, acc: 0.6875, no_result: 0.0312, loss: 0.7952 ||:   4%|4         | 1/24 [00:00<00:08,  2.84it/s]
+BLEU: 0.6842, acc: 0.6875, no_result: 0.0625, loss: 0.7582 ||:   8%|8         | 2/24 [00:00<00:07,  2.86it/s]
+BLEU: 0.6802, acc: 0.6771, no_result: 0.0938, loss: 0.8696 ||:  12%|#2        | 3/24 [00:01<00:07,  2.86it/s]
+BLEU: 0.7037, acc: 0.6641, no_result: 0.1172, loss: 0.8626 ||:  17%|#6        | 4/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.7235, acc: 0.6750, no_result: 0.1250, loss: 0.8540 ||:  21%|##        | 5/24 [00:01<00:07,  2.69it/s]
+BLEU: 0.7269, acc: 0.6458, no_result: 0.1146, loss: 0.8595 ||:  25%|##5       | 6/24 [00:02<00:06,  2.82it/s]
+BLEU: 0.7253, acc: 0.6562, no_result: 0.1116, loss: 0.8341 ||:  29%|##9       | 7/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7202, acc: 0.6602, no_result: 0.1094, loss: 0.8518 ||:  33%|###3      | 8/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7112, acc: 0.6528, no_result: 0.1042, loss: 0.8448 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7148, acc: 0.6500, no_result: 0.1031, loss: 0.8649 ||:  42%|####1     | 10/24 [00:03<00:04,  2.86it/s]
+BLEU: 0.7121, acc: 0.6335, no_result: 0.1051, loss: 0.9048 ||:  46%|####5     | 11/24 [00:03<00:04,  2.73it/s]
+BLEU: 0.7064, acc: 0.6354, no_result: 0.1016, loss: 0.9008 ||:  50%|#####     | 12/24 [00:04<00:04,  2.84it/s]
+BLEU: 0.6943, acc: 0.6322, no_result: 0.1034, loss: 0.9185 ||:  54%|#####4    | 13/24 [00:04<00:03,  2.75it/s]
+BLEU: 0.6942, acc: 0.6228, no_result: 0.1027, loss: 0.9541 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.66it/s]
+BLEU: 0.6826, acc: 0.6021, no_result: 0.1250, loss: 1.0119 ||:  62%|######2   | 15/24 [00:05<00:03,  2.45it/s]
+BLEU: 0.6806, acc: 0.5977, no_result: 0.1309, loss: 1.0334 ||:  67%|######6   | 16/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.6787, acc: 0.5919, no_result: 0.1379, loss: 1.0346 ||:  71%|#######   | 17/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.6803, acc: 0.5885, no_result: 0.1406, loss: 1.0442 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.6831, acc: 0.5970, no_result: 0.1349, loss: 1.0252 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.6838, acc: 0.6000, no_result: 0.1313, loss: 1.0209 ||:  83%|########3 | 20/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.6793, acc: 0.5967, no_result: 0.1280, loss: 1.0183 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.6823, acc: 0.5909, no_result: 0.1236, loss: 1.0171 ||:  92%|#########1| 22/24 [00:08<00:00,  2.66it/s]
+BLEU: 0.6874, acc: 0.5978, no_result: 0.1209, loss: 0.9975 ||:  96%|#########5| 23/24 [00:08<00:00,  2.73it/s]
+BLEU: 0.6865, acc: 0.5914, no_result: 0.1205, loss: 1.0133 ||: 100%|##########| 24/24 [00:08<00:00,  3.37it/s]
+BLEU: 0.6865, acc: 0.5914, no_result: 0.1205, loss: 1.0133 ||: 100%|##########| 24/24 [00:08<00:00,  2.77it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1936 ||:   1%|          | 1/110 [00:00<01:21,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2243 ||:   2%|1         | 2/110 [00:01<01:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2098 ||:   3%|2         | 3/110 [00:02<01:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2024 ||:   4%|3         | 4/110 [00:02<01:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1948 ||:   5%|4         | 5/110 [00:03<01:12,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1949 ||:   5%|5         | 6/110 [00:04<01:08,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1819 ||:   6%|6         | 7/110 [00:04<01:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1913 ||:   7%|7         | 8/110 [00:05<01:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1882 ||:   8%|8         | 9/110 [00:06<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1881 ||:   9%|9         | 10/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1879 ||:  10%|#         | 11/110 [00:07<01:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1852 ||:  11%|#         | 12/110 [00:08<01:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1826 ||:  12%|#1        | 13/110 [00:08<01:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1825 ||:  13%|#2        | 14/110 [00:09<01:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1798 ||:  14%|#3        | 15/110 [00:10<01:01,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1792 ||:  15%|#4        | 16/110 [00:10<00:59,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1841 ||:  15%|#5        | 17/110 [00:11<01:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1803 ||:  16%|#6        | 18/110 [00:12<01:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1802 ||:  17%|#7        | 19/110 [00:12<01:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1822 ||:  18%|#8        | 20/110 [00:13<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1830 ||:  19%|#9        | 21/110 [00:14<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1853 ||:  20%|##        | 22/110 [00:14<00:58,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1842 ||:  21%|##        | 23/110 [00:15<00:57,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1839 ||:  22%|##1       | 24/110 [00:16<00:57,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1826 ||:  23%|##2       | 25/110 [00:16<00:55,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1817 ||:  24%|##3       | 26/110 [00:17<00:56,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1843 ||:  25%|##4       | 27/110 [00:18<00:56,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1823 ||:  25%|##5       | 28/110 [00:18<00:54,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1814 ||:  26%|##6       | 29/110 [00:19<00:53,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1809 ||:  27%|##7       | 30/110 [00:20<00:52,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1800 ||:  28%|##8       | 31/110 [00:20<00:52,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1794 ||:  29%|##9       | 32/110 [00:21<00:53,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1791 ||:  30%|###       | 33/110 [00:22<00:55,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1785 ||:  31%|###       | 34/110 [00:23<00:53,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1769 ||:  32%|###1      | 35/110 [00:23<00:51,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1754 ||:  33%|###2      | 36/110 [00:24<00:49,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1757 ||:  34%|###3      | 37/110 [00:25<00:54,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1755 ||:  35%|###4      | 38/110 [00:25<00:50,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1744 ||:  35%|###5      | 39/110 [00:26<00:47,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1745 ||:  36%|###6      | 40/110 [00:27<00:46,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1733 ||:  37%|###7      | 41/110 [00:27<00:44,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1732 ||:  38%|###8      | 42/110 [00:28<00:43,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1740 ||:  39%|###9      | 43/110 [00:28<00:41,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1752 ||:  40%|####      | 44/110 [00:29<00:42,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1751 ||:  41%|####      | 45/110 [00:30<00:44,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1737 ||:  42%|####1     | 46/110 [00:31<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1729 ||:  43%|####2     | 47/110 [00:31<00:43,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1728 ||:  44%|####3     | 48/110 [00:32<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1731 ||:  45%|####4     | 49/110 [00:32<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1733 ||:  45%|####5     | 50/110 [00:33<00:40,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1735 ||:  46%|####6     | 51/110 [00:34<00:39,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1736 ||:  47%|####7     | 52/110 [00:34<00:36,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1730 ||:  48%|####8     | 53/110 [00:35<00:35,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1717 ||:  49%|####9     | 54/110 [00:36<00:34,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1711 ||:  50%|#####     | 55/110 [00:36<00:34,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1703 ||:  51%|#####     | 56/110 [00:37<00:35,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1695 ||:  52%|#####1    | 57/110 [00:38<00:34,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1704 ||:  53%|#####2    | 58/110 [00:38<00:34,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1706 ||:  54%|#####3    | 59/110 [00:39<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1709 ||:  55%|#####4    | 60/110 [00:40<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1704 ||:  55%|#####5    | 61/110 [00:40<00:32,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1700 ||:  56%|#####6    | 62/110 [00:41<00:32,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1704 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1709 ||:  58%|#####8    | 64/110 [00:42<00:30,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1703 ||:  59%|#####9    | 65/110 [00:43<00:29,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1693 ||:  60%|######    | 66/110 [00:44<00:30,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1688 ||:  61%|######    | 67/110 [00:44<00:29,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1687 ||:  62%|######1   | 68/110 [00:45<00:29,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1680 ||:  63%|######2   | 69/110 [00:46<00:27,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1681 ||:  64%|######3   | 70/110 [00:47<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1682 ||:  65%|######4   | 71/110 [00:47<00:26,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1684 ||:  65%|######5   | 72/110 [00:48<00:24,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1680 ||:  66%|######6   | 73/110 [00:48<00:23,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1678 ||:  67%|######7   | 74/110 [00:49<00:23,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1678 ||:  68%|######8   | 75/110 [00:50<00:23,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1681 ||:  69%|######9   | 76/110 [00:51<00:23,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1681 ||:  70%|#######   | 77/110 [00:51<00:23,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1696 ||:  71%|#######   | 78/110 [00:52<00:24,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1699 ||:  72%|#######1  | 79/110 [00:53<00:22,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1695 ||:  73%|#######2  | 80/110 [00:54<00:22,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1694 ||:  74%|#######3  | 81/110 [00:54<00:20,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1685 ||:  75%|#######4  | 82/110 [00:55<00:19,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1685 ||:  75%|#######5  | 83/110 [00:56<00:19,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1681 ||:  76%|#######6  | 84/110 [00:56<00:17,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1681 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1684 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1682 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1679 ||:  80%|########  | 88/110 [00:59<00:14,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1681 ||:  81%|########  | 89/110 [01:00<00:14,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1679 ||:  82%|########1 | 90/110 [01:00<00:13,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1685 ||:  83%|########2 | 91/110 [01:01<00:12,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1686 ||:  84%|########3 | 92/110 [01:01<00:11,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1687 ||:  85%|########4 | 93/110 [01:02<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1696 ||:  85%|########5 | 94/110 [01:03<00:10,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1700 ||:  86%|########6 | 95/110 [01:03<00:09,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1692 ||:  87%|########7 | 96/110 [01:04<00:08,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1694 ||:  88%|########8 | 97/110 [01:05<00:08,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1694 ||:  89%|########9 | 98/110 [01:05<00:07,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1693 ||:  90%|######### | 99/110 [01:06<00:07,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1691 ||:  91%|######### | 100/110 [01:07<00:09,  1.10it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1694 ||:  92%|#########1| 101/110 [01:08<00:07,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1700 ||:  93%|#########2| 102/110 [01:09<00:06,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1707 ||:  94%|#########3| 103/110 [01:09<00:05,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1707 ||:  95%|#########4| 104/110 [01:10<00:04,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1707 ||:  95%|#########5| 105/110 [01:11<00:03,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1715 ||:  96%|#########6| 106/110 [01:12<00:02,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1718 ||:  97%|#########7| 107/110 [01:12<00:02,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1721 ||:  98%|#########8| 108/110 [01:13<00:01,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1718 ||:  99%|#########9| 109/110 [01:14<00:00,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1711 ||: 100%|##########| 110/110 [01:14<00:00,  1.72it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1711 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6790, acc: 0.5625, no_result: 0.1250, loss: 0.8125 ||:   4%|4         | 1/24 [00:00<00:08,  2.66it/s]
+BLEU: 0.6272, acc: 0.5938, no_result: 0.1094, loss: 0.8110 ||:   8%|8         | 2/24 [00:00<00:08,  2.73it/s]
+BLEU: 0.6541, acc: 0.6146, no_result: 0.1250, loss: 0.9314 ||:  12%|#2        | 3/24 [00:01<00:07,  2.76it/s]
+BLEU: 0.6807, acc: 0.6172, no_result: 0.1172, loss: 0.9041 ||:  17%|#6        | 4/24 [00:01<00:07,  2.68it/s]
+BLEU: 0.7108, acc: 0.6625, no_result: 0.1000, loss: 0.8625 ||:  21%|##        | 5/24 [00:01<00:07,  2.65it/s]
+BLEU: 0.7055, acc: 0.6458, no_result: 0.0990, loss: 0.8838 ||:  25%|##5       | 6/24 [00:02<00:06,  2.69it/s]
+BLEU: 0.7137, acc: 0.6652, no_result: 0.1027, loss: 0.8699 ||:  29%|##9       | 7/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.7124, acc: 0.6719, no_result: 0.1016, loss: 0.8844 ||:  33%|###3      | 8/24 [00:02<00:05,  2.88it/s]
+BLEU: 0.7005, acc: 0.6771, no_result: 0.0972, loss: 0.8767 ||:  38%|###7      | 9/24 [00:03<00:05,  2.88it/s]
+BLEU: 0.7084, acc: 0.6656, no_result: 0.0938, loss: 0.8908 ||:  42%|####1     | 10/24 [00:03<00:04,  2.82it/s]
+BLEU: 0.7064, acc: 0.6420, no_result: 0.0966, loss: 0.9356 ||:  46%|####5     | 11/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7006, acc: 0.6484, no_result: 0.0938, loss: 0.9430 ||:  50%|#####     | 12/24 [00:04<00:04,  2.82it/s]
+BLEU: 0.6916, acc: 0.6394, no_result: 0.0986, loss: 0.9709 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.69it/s]
+BLEU: 0.6926, acc: 0.6295, no_result: 0.1004, loss: 1.0040 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.59it/s]
+BLEU: 0.6781, acc: 0.6125, no_result: 0.1229, loss: 1.0587 ||:  62%|######2   | 15/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.6707, acc: 0.6035, no_result: 0.1289, loss: 1.0820 ||:  67%|######6   | 16/24 [00:06<00:03,  2.37it/s]
+BLEU: 0.6727, acc: 0.5974, no_result: 0.1342, loss: 1.0755 ||:  71%|#######   | 17/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.6736, acc: 0.5972, no_result: 0.1372, loss: 1.0801 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.6777, acc: 0.6069, no_result: 0.1332, loss: 1.0571 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.6805, acc: 0.6141, no_result: 0.1297, loss: 1.0481 ||:  83%|########3 | 20/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.6754, acc: 0.6101, no_result: 0.1280, loss: 1.0499 ||:  88%|########7 | 21/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.6798, acc: 0.6009, no_result: 0.1264, loss: 1.0494 ||:  92%|#########1| 22/24 [00:08<00:00,  2.65it/s]
+BLEU: 0.6829, acc: 0.6101, no_result: 0.1209, loss: 1.0302 ||:  96%|#########5| 23/24 [00:08<00:00,  2.73it/s]
+BLEU: 0.6822, acc: 0.6170, no_result: 0.1159, loss: 1.0322 ||: 100%|##########| 24/24 [00:08<00:00,  3.38it/s]
+BLEU: 0.6822, acc: 0.6170, no_result: 0.1159, loss: 1.0322 ||: 100%|##########| 24/24 [00:08<00:00,  2.73it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1236 ||:   1%|          | 1/110 [00:00<01:14,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1462 ||:   2%|1         | 2/110 [00:01<01:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1483 ||:   3%|2         | 3/110 [00:01<01:10,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1544 ||:   4%|3         | 4/110 [00:02<01:07,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1550 ||:   5%|4         | 5/110 [00:03<01:06,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1551 ||:   5%|5         | 6/110 [00:03<01:07,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1516 ||:   6%|6         | 7/110 [00:04<01:08,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1486 ||:   7%|7         | 8/110 [00:05<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1508 ||:   8%|8         | 9/110 [00:05<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1508 ||:   9%|9         | 10/110 [00:06<01:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1448 ||:  10%|#         | 11/110 [00:07<01:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1397 ||:  11%|#         | 12/110 [00:07<01:05,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1373 ||:  12%|#1        | 13/110 [00:08<01:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1357 ||:  13%|#2        | 14/110 [00:09<01:01,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1367 ||:  14%|#3        | 15/110 [00:09<01:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1373 ||:  15%|#4        | 16/110 [00:10<01:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1392 ||:  15%|#5        | 17/110 [00:11<01:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1377 ||:  16%|#6        | 18/110 [00:11<01:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1365 ||:  17%|#7        | 19/110 [00:12<00:59,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1413 ||:  18%|#8        | 20/110 [00:13<01:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1427 ||:  19%|#9        | 21/110 [00:14<01:01,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1443 ||:  20%|##        | 22/110 [00:14<00:59,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1447 ||:  21%|##        | 23/110 [00:15<00:57,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1462 ||:  22%|##1       | 24/110 [00:15<00:55,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1454 ||:  23%|##2       | 25/110 [00:16<00:55,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1475 ||:  24%|##3       | 26/110 [00:17<00:55,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1479 ||:  25%|##4       | 27/110 [00:17<00:54,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1476 ||:  25%|##5       | 28/110 [00:18<00:54,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1475 ||:  26%|##6       | 29/110 [00:19<00:51,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1474 ||:  27%|##7       | 30/110 [00:19<00:49,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1487 ||:  28%|##8       | 31/110 [00:20<00:53,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1528 ||:  29%|##9       | 32/110 [00:21<00:52,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1528 ||:  30%|###       | 33/110 [00:21<00:50,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1519 ||:  31%|###       | 34/110 [00:22<00:51,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1514 ||:  32%|###1      | 35/110 [00:23<00:52,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1510 ||:  33%|###2      | 36/110 [00:24<00:52,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1500 ||:  34%|###3      | 37/110 [00:24<00:54,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1525 ||:  35%|###4      | 38/110 [00:25<00:50,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1520 ||:  35%|###5      | 39/110 [00:26<00:51,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1524 ||:  36%|###6      | 40/110 [00:27<00:50,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1524 ||:  37%|###7      | 41/110 [00:27<00:47,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1527 ||:  38%|###8      | 42/110 [00:28<00:45,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1517 ||:  39%|###9      | 43/110 [00:28<00:44,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1512 ||:  40%|####      | 44/110 [00:29<00:43,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1509 ||:  41%|####      | 45/110 [00:30<00:44,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1520 ||:  42%|####1     | 46/110 [00:30<00:42,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1525 ||:  43%|####2     | 47/110 [00:31<00:42,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1524 ||:  44%|####3     | 48/110 [00:32<00:39,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1524 ||:  45%|####4     | 49/110 [00:32<00:38,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1520 ||:  45%|####5     | 50/110 [00:33<00:37,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1514 ||:  46%|####6     | 51/110 [00:33<00:36,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1515 ||:  47%|####7     | 52/110 [00:34<00:35,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1520 ||:  48%|####8     | 53/110 [00:35<00:35,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1518 ||:  49%|####9     | 54/110 [00:35<00:34,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1519 ||:  50%|#####     | 55/110 [00:36<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1523 ||:  51%|#####     | 56/110 [00:37<00:38,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1520 ||:  52%|#####1    | 57/110 [00:38<00:37,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1528 ||:  53%|#####2    | 58/110 [00:38<00:36,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1526 ||:  54%|#####3    | 59/110 [00:39<00:34,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1518 ||:  55%|#####4    | 60/110 [00:40<00:33,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1513 ||:  55%|#####5    | 61/110 [00:40<00:33,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1514 ||:  56%|#####6    | 62/110 [00:41<00:33,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1522 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1524 ||:  58%|#####8    | 64/110 [00:42<00:29,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1526 ||:  59%|#####9    | 65/110 [00:43<00:30,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1529 ||:  60%|######    | 66/110 [00:44<00:29,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1532 ||:  61%|######    | 67/110 [00:44<00:28,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1534 ||:  62%|######1   | 68/110 [00:45<00:27,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1533 ||:  63%|######2   | 69/110 [00:46<00:26,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1532 ||:  64%|######3   | 70/110 [00:46<00:26,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1529 ||:  65%|######4   | 71/110 [00:47<00:25,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1531 ||:  65%|######5   | 72/110 [00:48<00:24,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1535 ||:  66%|######6   | 73/110 [00:48<00:23,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1540 ||:  67%|######7   | 74/110 [00:49<00:23,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1539 ||:  68%|######8   | 75/110 [00:50<00:22,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1546 ||:  69%|######9   | 76/110 [00:50<00:22,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1545 ||:  70%|#######   | 77/110 [00:51<00:22,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1555 ||:  71%|#######   | 78/110 [00:52<00:22,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1557 ||:  72%|#######1  | 79/110 [00:53<00:22,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1563 ||:  73%|#######2  | 80/110 [00:53<00:22,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1567 ||:  74%|#######3  | 81/110 [00:54<00:20,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1563 ||:  75%|#######4  | 82/110 [00:55<00:19,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1562 ||:  75%|#######5  | 83/110 [00:55<00:18,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1566 ||:  76%|#######6  | 84/110 [00:56<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1569 ||:  77%|#######7  | 85/110 [00:56<00:16,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1565 ||:  78%|#######8  | 86/110 [00:57<00:15,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1560 ||:  79%|#######9  | 87/110 [00:58<00:14,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1569 ||:  80%|########  | 88/110 [00:58<00:13,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1566 ||:  81%|########  | 89/110 [00:59<00:13,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1567 ||:  82%|########1 | 90/110 [01:00<00:17,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1562 ||:  83%|########2 | 91/110 [01:01<00:15,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1571 ||:  84%|########3 | 92/110 [01:02<00:13,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1573 ||:  85%|########4 | 93/110 [01:02<00:12,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1574 ||:  85%|########5 | 94/110 [01:03<00:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1574 ||:  86%|########6 | 95/110 [01:04<00:10,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1577 ||:  87%|########7 | 96/110 [01:04<00:09,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1580 ||:  88%|########8 | 97/110 [01:05<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1585 ||:  89%|########9 | 98/110 [01:06<00:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1588 ||:  90%|######### | 99/110 [01:07<00:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1592 ||:  91%|######### | 100/110 [01:07<00:07,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1593 ||:  92%|#########1| 101/110 [01:08<00:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1592 ||:  93%|#########2| 102/110 [01:09<00:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1592 ||:  94%|#########3| 103/110 [01:10<00:05,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1592 ||:  95%|#########4| 104/110 [01:10<00:04,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1590 ||:  95%|#########5| 105/110 [01:11<00:03,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1590 ||:  96%|#########6| 106/110 [01:12<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1591 ||:  97%|#########7| 107/110 [01:12<00:02,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1587 ||:  98%|#########8| 108/110 [01:13<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1588 ||:  99%|#########9| 109/110 [01:14<00:00,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1595 ||: 100%|##########| 110/110 [01:14<00:00,  1.75it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1595 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6992, acc: 0.7188, no_result: 0.0625, loss: 0.8807 ||:   4%|4         | 1/24 [00:00<00:08,  2.83it/s]
+BLEU: 0.6641, acc: 0.7344, no_result: 0.0625, loss: 0.8143 ||:   8%|8         | 2/24 [00:00<00:07,  2.87it/s]
+BLEU: 0.6344, acc: 0.6771, no_result: 0.1146, loss: 0.9011 ||:  12%|#2        | 3/24 [00:01<00:07,  2.87it/s]
+BLEU: 0.6586, acc: 0.6641, no_result: 0.1406, loss: 0.8636 ||:  17%|#6        | 4/24 [00:01<00:07,  2.75it/s]
+BLEU: 0.6908, acc: 0.6687, no_result: 0.1375, loss: 0.8689 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.6808, acc: 0.6302, no_result: 0.1302, loss: 0.8719 ||:  25%|##5       | 6/24 [00:02<00:06,  2.81it/s]
+BLEU: 0.6892, acc: 0.6473, no_result: 0.1161, loss: 0.8542 ||:  29%|##9       | 7/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.6917, acc: 0.6680, no_result: 0.1094, loss: 0.8704 ||:  33%|###3      | 8/24 [00:02<00:05,  2.98it/s]
+BLEU: 0.6831, acc: 0.6632, no_result: 0.1076, loss: 0.8670 ||:  38%|###7      | 9/24 [00:03<00:05,  2.94it/s]
+BLEU: 0.6871, acc: 0.6562, no_result: 0.1062, loss: 0.8805 ||:  42%|####1     | 10/24 [00:03<00:04,  2.88it/s]
+BLEU: 0.6889, acc: 0.6449, no_result: 0.1136, loss: 0.9170 ||:  46%|####5     | 11/24 [00:03<00:04,  2.74it/s]
+BLEU: 0.6835, acc: 0.6536, no_result: 0.1094, loss: 0.9158 ||:  50%|#####     | 12/24 [00:04<00:04,  2.83it/s]
+BLEU: 0.6758, acc: 0.6466, no_result: 0.1106, loss: 0.9344 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.73it/s]
+BLEU: 0.6797, acc: 0.6362, no_result: 0.1094, loss: 0.9743 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.64it/s]
+BLEU: 0.6659, acc: 0.6188, no_result: 0.1250, loss: 1.0255 ||:  62%|######2   | 15/24 [00:05<00:03,  2.48it/s]
+BLEU: 0.6572, acc: 0.6074, no_result: 0.1270, loss: 1.0492 ||:  67%|######6   | 16/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.6548, acc: 0.6011, no_result: 0.1360, loss: 1.0476 ||:  71%|#######   | 17/24 [00:06<00:02,  2.49it/s]
+BLEU: 0.6560, acc: 0.6024, no_result: 0.1372, loss: 1.0520 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.50it/s]
+BLEU: 0.6600, acc: 0.6069, no_result: 0.1349, loss: 1.0309 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.59it/s]
+BLEU: 0.6638, acc: 0.6141, no_result: 0.1313, loss: 1.0206 ||:  83%|########3 | 20/24 [00:07<00:01,  2.72it/s]
+BLEU: 0.6613, acc: 0.6086, no_result: 0.1310, loss: 1.0235 ||:  88%|########7 | 21/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.6637, acc: 0.6009, no_result: 0.1307, loss: 1.0172 ||:  92%|#########1| 22/24 [00:08<00:00,  2.65it/s]
+BLEU: 0.6682, acc: 0.6101, no_result: 0.1277, loss: 1.0006 ||:  96%|#########5| 23/24 [00:08<00:00,  2.75it/s]
+BLEU: 0.6685, acc: 0.6078, no_result: 0.1224, loss: 1.0189 ||: 100%|##########| 24/24 [00:08<00:00,  3.41it/s]
+BLEU: 0.6685, acc: 0.6078, no_result: 0.1224, loss: 1.0189 ||: 100%|##########| 24/24 [00:08<00:00,  2.79it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2545 ||:   1%|          | 1/110 [00:00<01:08,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.2268 ||:   2%|1         | 2/110 [00:01<01:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1901 ||:   3%|2         | 3/110 [00:02<01:17,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1699 ||:   4%|3         | 4/110 [00:02<01:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1675 ||:   5%|4         | 5/110 [00:03<01:08,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1567 ||:   5%|5         | 6/110 [00:03<01:08,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1569 ||:   6%|6         | 7/110 [00:04<01:08,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1587 ||:   7%|7         | 8/110 [00:05<01:07,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1670 ||:   8%|8         | 9/110 [00:06<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1688 ||:   9%|9         | 10/110 [00:06<01:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1654 ||:  10%|#         | 11/110 [00:07<01:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1683 ||:  11%|#         | 12/110 [00:08<01:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1636 ||:  12%|#1        | 13/110 [00:08<01:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1634 ||:  13%|#2        | 14/110 [00:09<01:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1604 ||:  14%|#3        | 15/110 [00:10<01:08,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1595 ||:  15%|#4        | 16/110 [00:10<01:04,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1613 ||:  15%|#5        | 17/110 [00:11<01:07,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1655 ||:  16%|#6        | 18/110 [00:12<01:05,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1656 ||:  17%|#7        | 19/110 [00:13<01:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1633 ||:  18%|#8        | 20/110 [00:13<01:06,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1615 ||:  19%|#9        | 21/110 [00:14<01:04,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1623 ||:  20%|##        | 22/110 [00:15<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1612 ||:  21%|##        | 23/110 [00:15<00:58,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1624 ||:  22%|##1       | 24/110 [00:16<00:59,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1601 ||:  23%|##2       | 25/110 [00:17<00:58,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1604 ||:  24%|##3       | 26/110 [00:18<01:01,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1581 ||:  25%|##4       | 27/110 [00:18<00:58,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1567 ||:  25%|##5       | 28/110 [00:19<00:57,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1552 ||:  26%|##6       | 29/110 [00:20<00:55,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1557 ||:  27%|##7       | 30/110 [00:20<00:57,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1546 ||:  28%|##8       | 31/110 [00:21<00:58,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1545 ||:  29%|##9       | 32/110 [00:22<00:55,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1561 ||:  30%|###       | 33/110 [00:22<00:51,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1556 ||:  31%|###       | 34/110 [00:23<00:49,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1546 ||:  32%|###1      | 35/110 [00:23<00:46,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1534 ||:  33%|###2      | 36/110 [00:24<00:45,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1525 ||:  34%|###3      | 37/110 [00:25<00:43,  1.66it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1539 ||:  35%|###4      | 38/110 [00:25<00:44,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1538 ||:  35%|###5      | 39/110 [00:26<00:44,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1549 ||:  36%|###6      | 40/110 [00:27<00:42,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1548 ||:  37%|###7      | 41/110 [00:27<00:44,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1542 ||:  38%|###8      | 42/110 [00:28<00:44,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1538 ||:  39%|###9      | 43/110 [00:29<00:44,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1546 ||:  40%|####      | 44/110 [00:29<00:43,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1537 ||:  41%|####      | 45/110 [00:30<00:47,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1525 ||:  42%|####1     | 46/110 [00:31<00:44,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1518 ||:  43%|####2     | 47/110 [00:31<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1506 ||:  44%|####3     | 48/110 [00:32<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1496 ||:  45%|####4     | 49/110 [00:33<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1492 ||:  45%|####5     | 50/110 [00:33<00:40,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1498 ||:  46%|####6     | 51/110 [00:34<00:37,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1488 ||:  47%|####7     | 52/110 [00:35<00:38,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1490 ||:  48%|####8     | 53/110 [00:35<00:37,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1502 ||:  49%|####9     | 54/110 [00:36<00:36,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1510 ||:  50%|#####     | 55/110 [00:37<00:36,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1514 ||:  51%|#####     | 56/110 [00:37<00:35,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1516 ||:  52%|#####1    | 57/110 [00:38<00:33,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1517 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1516 ||:  54%|#####3    | 59/110 [00:39<00:33,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1516 ||:  55%|#####4    | 60/110 [00:40<00:33,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1527 ||:  55%|#####5    | 61/110 [00:41<00:33,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1525 ||:  56%|#####6    | 62/110 [00:41<00:32,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1524 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1531 ||:  58%|#####8    | 64/110 [00:43<00:31,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1527 ||:  59%|#####9    | 65/110 [00:43<00:29,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1526 ||:  60%|######    | 66/110 [00:44<00:28,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1525 ||:  61%|######    | 67/110 [00:45<00:27,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1524 ||:  62%|######1   | 68/110 [00:45<00:28,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1527 ||:  63%|######2   | 69/110 [00:46<00:27,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1531 ||:  64%|######3   | 70/110 [00:47<00:26,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1528 ||:  65%|######4   | 71/110 [00:47<00:25,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1522 ||:  65%|######5   | 72/110 [00:48<00:25,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1532 ||:  66%|######6   | 73/110 [00:49<00:24,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1541 ||:  67%|######7   | 74/110 [00:49<00:23,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1533 ||:  68%|######8   | 75/110 [00:50<00:22,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1533 ||:  69%|######9   | 76/110 [00:50<00:21,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1529 ||:  70%|#######   | 77/110 [00:51<00:23,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1529 ||:  71%|#######   | 78/110 [00:52<00:23,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1526 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1533 ||:  73%|#######2  | 80/110 [00:54<00:27,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1535 ||:  74%|#######3  | 81/110 [00:55<00:24,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1527 ||:  75%|#######4  | 82/110 [00:56<00:22,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1527 ||:  75%|#######5  | 83/110 [00:56<00:20,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1529 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1529 ||:  77%|#######7  | 85/110 [00:57<00:17,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1527 ||:  78%|#######8  | 86/110 [00:58<00:15,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1522 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1516 ||:  80%|########  | 88/110 [00:59<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1520 ||:  81%|########  | 89/110 [01:00<00:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1515 ||:  82%|########1 | 90/110 [01:01<00:13,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1523 ||:  83%|########2 | 91/110 [01:01<00:12,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1520 ||:  84%|########3 | 92/110 [01:02<00:11,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1523 ||:  85%|########4 | 93/110 [01:03<00:11,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1526 ||:  85%|########5 | 94/110 [01:03<00:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1529 ||:  86%|########6 | 95/110 [01:04<00:09,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1530 ||:  87%|########7 | 96/110 [01:05<00:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1528 ||:  88%|########8 | 97/110 [01:05<00:09,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1532 ||:  89%|########9 | 98/110 [01:06<00:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1533 ||:  90%|######### | 99/110 [01:07<00:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1530 ||:  91%|######### | 100/110 [01:07<00:06,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1530 ||:  92%|#########1| 101/110 [01:08<00:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1525 ||:  93%|#########2| 102/110 [01:09<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1524 ||:  94%|#########3| 103/110 [01:09<00:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1522 ||:  95%|#########4| 104/110 [01:10<00:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1518 ||:  95%|#########5| 105/110 [01:11<00:03,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1519 ||:  96%|#########6| 106/110 [01:11<00:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1520 ||:  97%|#########7| 107/110 [01:12<00:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1522 ||:  98%|#########8| 108/110 [01:13<00:01,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1526 ||:  99%|#########9| 109/110 [01:14<00:00,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1522 ||: 100%|##########| 110/110 [01:14<00:00,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1522 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7069, acc: 0.6562, no_result: 0.0625, loss: 0.8922 ||:   4%|4         | 1/24 [00:00<00:09,  2.37it/s]
+BLEU: 0.6464, acc: 0.7031, no_result: 0.0625, loss: 0.8140 ||:   8%|8         | 2/24 [00:00<00:08,  2.45it/s]
+BLEU: 0.6452, acc: 0.6667, no_result: 0.0833, loss: 0.9113 ||:  12%|#2        | 3/24 [00:01<00:08,  2.45it/s]
+BLEU: 0.6812, acc: 0.6719, no_result: 0.0938, loss: 0.8694 ||:  17%|#6        | 4/24 [00:01<00:08,  2.39it/s]
+BLEU: 0.7088, acc: 0.6875, no_result: 0.1000, loss: 0.8645 ||:  21%|##        | 5/24 [00:02<00:08,  2.34it/s]
+BLEU: 0.6955, acc: 0.6562, no_result: 0.0885, loss: 0.8740 ||:  25%|##5       | 6/24 [00:02<00:07,  2.43it/s]
+BLEU: 0.7039, acc: 0.6741, no_result: 0.0893, loss: 0.8574 ||:  29%|##9       | 7/24 [00:02<00:06,  2.50it/s]
+BLEU: 0.7001, acc: 0.6875, no_result: 0.0898, loss: 0.8809 ||:  33%|###3      | 8/24 [00:03<00:06,  2.53it/s]
+BLEU: 0.6953, acc: 0.6875, no_result: 0.0868, loss: 0.8714 ||:  38%|###7      | 9/24 [00:03<00:05,  2.50it/s]
+BLEU: 0.7021, acc: 0.6844, no_result: 0.0875, loss: 0.8933 ||:  42%|####1     | 10/24 [00:04<00:05,  2.47it/s]
+BLEU: 0.7028, acc: 0.6676, no_result: 0.0938, loss: 0.9300 ||:  46%|####5     | 11/24 [00:04<00:05,  2.35it/s]
+BLEU: 0.7003, acc: 0.6693, no_result: 0.0911, loss: 0.9302 ||:  50%|#####     | 12/24 [00:04<00:05,  2.39it/s]
+BLEU: 0.6960, acc: 0.6731, no_result: 0.0938, loss: 0.9527 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.32it/s]
+BLEU: 0.6948, acc: 0.6585, no_result: 0.0982, loss: 0.9884 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.24it/s]
+BLEU: 0.6845, acc: 0.6438, no_result: 0.1125, loss: 1.0425 ||:  62%|######2   | 15/24 [00:06<00:04,  2.10it/s]
+BLEU: 0.6811, acc: 0.6328, no_result: 0.1191, loss: 1.0592 ||:  67%|######6   | 16/24 [00:06<00:03,  2.06it/s]
+BLEU: 0.6806, acc: 0.6287, no_result: 0.1250, loss: 1.0561 ||:  71%|#######   | 17/24 [00:07<00:03,  2.10it/s]
+BLEU: 0.6767, acc: 0.6285, no_result: 0.1250, loss: 1.0647 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.09it/s]
+BLEU: 0.6795, acc: 0.6332, no_result: 0.1217, loss: 1.0447 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.19it/s]
+BLEU: 0.6825, acc: 0.6359, no_result: 0.1203, loss: 1.0352 ||:  83%|########3 | 20/24 [00:08<00:01,  2.31it/s]
+BLEU: 0.6812, acc: 0.6280, no_result: 0.1161, loss: 1.0374 ||:  88%|########7 | 21/24 [00:09<00:01,  2.30it/s]
+BLEU: 0.6823, acc: 0.6193, no_result: 0.1122, loss: 1.0380 ||:  92%|#########1| 22/24 [00:09<00:00,  2.27it/s]
+BLEU: 0.6848, acc: 0.6277, no_result: 0.1087, loss: 1.0210 ||:  96%|#########5| 23/24 [00:09<00:00,  2.37it/s]
+BLEU: 0.6838, acc: 0.6247, no_result: 0.1088, loss: 1.0388 ||: 100%|##########| 24/24 [00:10<00:00,  2.88it/s]
+BLEU: 0.6838, acc: 0.6247, no_result: 0.1088, loss: 1.0388 ||: 100%|##########| 24/24 [00:10<00:00,  2.37it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1343 ||:   1%|          | 1/110 [00:00<01:20,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1018 ||:   2%|1         | 2/110 [00:01<01:14,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1127 ||:   3%|2         | 3/110 [00:01<01:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1123 ||:   4%|3         | 4/110 [00:02<01:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1123 ||:   5%|4         | 5/110 [00:03<01:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1283 ||:   5%|5         | 6/110 [00:03<01:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1307 ||:   6%|6         | 7/110 [00:04<01:05,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1293 ||:   7%|7         | 8/110 [00:05<01:05,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1234 ||:   8%|8         | 9/110 [00:05<01:03,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1255 ||:   9%|9         | 10/110 [00:06<01:03,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1300 ||:  10%|#         | 11/110 [00:07<01:01,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1306 ||:  11%|#         | 12/110 [00:07<01:03,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1276 ||:  12%|#1        | 13/110 [00:08<01:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1282 ||:  13%|#2        | 14/110 [00:09<01:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1256 ||:  14%|#3        | 15/110 [00:09<01:02,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1234 ||:  15%|#4        | 16/110 [00:10<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1244 ||:  15%|#5        | 17/110 [00:11<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1226 ||:  16%|#6        | 18/110 [00:11<01:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1215 ||:  17%|#7        | 19/110 [00:12<00:57,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1224 ||:  18%|#8        | 20/110 [00:13<01:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1220 ||:  19%|#9        | 21/110 [00:13<01:05,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1236 ||:  20%|##        | 22/110 [00:14<01:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1239 ||:  21%|##        | 23/110 [00:15<00:58,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1269 ||:  22%|##1       | 24/110 [00:15<00:57,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1275 ||:  23%|##2       | 25/110 [00:16<00:55,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1285 ||:  24%|##3       | 26/110 [00:17<00:57,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1275 ||:  25%|##4       | 27/110 [00:17<00:58,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1270 ||:  25%|##5       | 28/110 [00:18<00:58,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1279 ||:  26%|##6       | 29/110 [00:19<00:54,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1277 ||:  27%|##7       | 30/110 [00:20<00:55,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1280 ||:  28%|##8       | 31/110 [00:20<00:54,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1273 ||:  29%|##9       | 32/110 [00:21<00:53,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1287 ||:  30%|###       | 33/110 [00:22<00:51,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1285 ||:  31%|###       | 34/110 [00:22<00:50,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1280 ||:  32%|###1      | 35/110 [00:23<00:49,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1280 ||:  33%|###2      | 36/110 [00:23<00:47,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1286 ||:  34%|###3      | 37/110 [00:24<00:47,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1283 ||:  35%|###4      | 38/110 [00:25<00:48,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1276 ||:  35%|###5      | 39/110 [00:26<00:49,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1287 ||:  36%|###6      | 40/110 [00:26<00:47,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1279 ||:  37%|###7      | 41/110 [00:27<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1279 ||:  38%|###8      | 42/110 [00:28<00:46,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1277 ||:  39%|###9      | 43/110 [00:28<00:46,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1272 ||:  40%|####      | 44/110 [00:29<00:46,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1276 ||:  41%|####      | 45/110 [00:30<00:46,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1284 ||:  42%|####1     | 46/110 [00:31<00:49,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1287 ||:  43%|####2     | 47/110 [00:31<00:47,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1287 ||:  44%|####3     | 48/110 [00:32<00:45,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1289 ||:  45%|####4     | 49/110 [00:33<00:45,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1293 ||:  45%|####5     | 50/110 [00:34<00:45,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1308 ||:  46%|####6     | 51/110 [00:34<00:43,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1306 ||:  47%|####7     | 52/110 [00:35<00:41,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1316 ||:  48%|####8     | 53/110 [00:36<00:39,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1311 ||:  49%|####9     | 54/110 [00:36<00:37,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1310 ||:  50%|#####     | 55/110 [00:37<00:36,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1316 ||:  51%|#####     | 56/110 [00:37<00:34,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1328 ||:  52%|#####1    | 57/110 [00:38<00:35,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1325 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1331 ||:  54%|#####3    | 59/110 [00:39<00:32,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1334 ||:  55%|#####4    | 60/110 [00:40<00:34,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1342 ||:  55%|#####5    | 61/110 [00:41<00:33,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1349 ||:  56%|#####6    | 62/110 [00:41<00:31,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1352 ||:  57%|#####7    | 63/110 [00:42<00:30,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1348 ||:  58%|#####8    | 64/110 [00:43<00:32,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1350 ||:  59%|#####9    | 65/110 [00:44<00:34,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1352 ||:  60%|######    | 66/110 [00:45<00:32,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1352 ||:  61%|######    | 67/110 [00:45<00:30,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1352 ||:  62%|######1   | 68/110 [00:46<00:28,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1351 ||:  63%|######2   | 69/110 [00:46<00:27,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1350 ||:  64%|######3   | 70/110 [00:48<00:39,  1.01it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1365 ||:  65%|######4   | 71/110 [00:49<00:34,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1366 ||:  65%|######5   | 72/110 [00:50<00:31,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1367 ||:  66%|######6   | 73/110 [00:50<00:29,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1369 ||:  67%|######7   | 74/110 [00:51<00:26,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1373 ||:  68%|######8   | 75/110 [00:51<00:24,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1374 ||:  69%|######9   | 76/110 [00:52<00:24,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1366 ||:  70%|#######   | 77/110 [00:53<00:22,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1369 ||:  71%|#######   | 78/110 [00:53<00:20,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1376 ||:  72%|#######1  | 79/110 [00:54<00:19,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1377 ||:  73%|#######2  | 80/110 [00:55<00:19,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1382 ||:  74%|#######3  | 81/110 [00:55<00:19,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1380 ||:  75%|#######4  | 82/110 [00:56<00:18,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1378 ||:  75%|#######5  | 83/110 [00:57<00:18,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1376 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1380 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1383 ||:  78%|#######8  | 86/110 [00:59<00:16,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1379 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1385 ||:  80%|########  | 88/110 [01:00<00:14,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1382 ||:  81%|########  | 89/110 [01:01<00:13,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1383 ||:  82%|########1 | 90/110 [01:01<00:12,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1388 ||:  83%|########2 | 91/110 [01:02<00:12,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1382 ||:  84%|########3 | 92/110 [01:03<00:11,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1389 ||:  85%|########4 | 93/110 [01:03<00:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1391 ||:  85%|########5 | 94/110 [01:04<00:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1391 ||:  86%|########6 | 95/110 [01:05<00:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1392 ||:  87%|########7 | 96/110 [01:05<00:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1396 ||:  88%|########8 | 97/110 [01:06<00:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1397 ||:  89%|########9 | 98/110 [01:07<00:07,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1402 ||:  90%|######### | 99/110 [01:07<00:07,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1406 ||:  91%|######### | 100/110 [01:08<00:06,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1406 ||:  92%|#########1| 101/110 [01:09<00:05,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1407 ||:  93%|#########2| 102/110 [01:09<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1405 ||:  94%|#########3| 103/110 [01:10<00:04,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1409 ||:  95%|#########4| 104/110 [01:11<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1411 ||:  95%|#########5| 105/110 [01:11<00:03,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1414 ||:  96%|#########6| 106/110 [01:12<00:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1416 ||:  97%|#########7| 107/110 [01:13<00:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1415 ||:  98%|#########8| 108/110 [01:13<00:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1415 ||:  99%|#########9| 109/110 [01:14<00:00,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1408 ||: 100%|##########| 110/110 [01:14<00:00,  1.74it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1408 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7266, acc: 0.7188, no_result: 0.0312, loss: 0.8956 ||:   4%|4         | 1/24 [00:00<00:08,  2.75it/s]
+BLEU: 0.6817, acc: 0.6719, no_result: 0.0781, loss: 0.8445 ||:   8%|8         | 2/24 [00:00<00:07,  2.83it/s]
+BLEU: 0.6635, acc: 0.6354, no_result: 0.1250, loss: 0.9668 ||:  12%|#2        | 3/24 [00:01<00:07,  2.81it/s]
+BLEU: 0.6882, acc: 0.6328, no_result: 0.1250, loss: 0.9375 ||:  17%|#6        | 4/24 [00:01<00:07,  2.72it/s]
+BLEU: 0.7156, acc: 0.6687, no_result: 0.1125, loss: 0.9159 ||:  21%|##        | 5/24 [00:01<00:07,  2.68it/s]
+BLEU: 0.7175, acc: 0.6406, no_result: 0.1094, loss: 0.9314 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.7240, acc: 0.6562, no_result: 0.1027, loss: 0.9126 ||:  29%|##9       | 7/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.7196, acc: 0.6680, no_result: 0.1016, loss: 0.9299 ||:  33%|###3      | 8/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7073, acc: 0.6667, no_result: 0.1007, loss: 0.9196 ||:  38%|###7      | 9/24 [00:03<00:05,  2.88it/s]
+BLEU: 0.7138, acc: 0.6500, no_result: 0.1000, loss: 0.9357 ||:  42%|####1     | 10/24 [00:03<00:04,  2.81it/s]
+BLEU: 0.7129, acc: 0.6364, no_result: 0.1051, loss: 0.9742 ||:  46%|####5     | 11/24 [00:03<00:04,  2.68it/s]
+BLEU: 0.7071, acc: 0.6458, no_result: 0.0990, loss: 0.9758 ||:  50%|#####     | 12/24 [00:04<00:04,  2.78it/s]
+BLEU: 0.7006, acc: 0.6442, no_result: 0.1082, loss: 0.9979 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.68it/s]
+BLEU: 0.7020, acc: 0.6362, no_result: 0.1071, loss: 1.0399 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.60it/s]
+BLEU: 0.6930, acc: 0.6167, no_result: 0.1250, loss: 1.0899 ||:  62%|######2   | 15/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.6935, acc: 0.6074, no_result: 0.1289, loss: 1.1089 ||:  67%|######6   | 16/24 [00:06<00:03,  2.38it/s]
+BLEU: 0.6944, acc: 0.6048, no_result: 0.1379, loss: 1.1004 ||:  71%|#######   | 17/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.6920, acc: 0.6059, no_result: 0.1389, loss: 1.1069 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.6966, acc: 0.6184, no_result: 0.1332, loss: 1.0847 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.6970, acc: 0.6203, no_result: 0.1281, loss: 1.0742 ||:  83%|########3 | 20/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.6933, acc: 0.6161, no_result: 0.1250, loss: 1.0758 ||:  88%|########7 | 21/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.6939, acc: 0.6023, no_result: 0.1250, loss: 1.0706 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.6981, acc: 0.6101, no_result: 0.1223, loss: 1.0498 ||:  96%|#########5| 23/24 [00:08<00:00,  2.73it/s]
+BLEU: 0.6969, acc: 0.6078, no_result: 0.1264, loss: 1.0652 ||: 100%|##########| 24/24 [00:08<00:00,  3.38it/s]
+BLEU: 0.6969, acc: 0.6078, no_result: 0.1264, loss: 1.0652 ||: 100%|##########| 24/24 [00:08<00:00,  2.75it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1681 ||:   1%|          | 1/110 [00:00<01:18,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1577 ||:   2%|1         | 2/110 [00:01<01:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1395 ||:   3%|2         | 3/110 [00:02<01:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1247 ||:   4%|3         | 4/110 [00:02<01:12,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1298 ||:   5%|4         | 5/110 [00:03<01:13,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1265 ||:   5%|5         | 6/110 [00:04<01:17,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1358 ||:   6%|6         | 7/110 [00:04<01:13,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1341 ||:   7%|7         | 8/110 [00:05<01:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1308 ||:   8%|8         | 9/110 [00:06<01:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1265 ||:   9%|9         | 10/110 [00:07<01:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1269 ||:  10%|#         | 11/110 [00:07<01:06,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1263 ||:  11%|#         | 12/110 [00:08<01:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1279 ||:  12%|#1        | 13/110 [00:09<01:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1258 ||:  13%|#2        | 14/110 [00:09<01:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1216 ||:  14%|#3        | 15/110 [00:10<01:02,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1201 ||:  15%|#4        | 16/110 [00:10<01:00,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1216 ||:  15%|#5        | 17/110 [00:11<00:58,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1197 ||:  16%|#6        | 18/110 [00:12<01:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1217 ||:  17%|#7        | 19/110 [00:12<01:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1206 ||:  18%|#8        | 20/110 [00:13<00:59,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1207 ||:  19%|#9        | 21/110 [00:14<00:59,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1207 ||:  20%|##        | 22/110 [00:15<01:00,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1184 ||:  21%|##        | 23/110 [00:15<00:57,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1199 ||:  22%|##1       | 24/110 [00:16<00:57,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1223 ||:  23%|##2       | 25/110 [00:16<00:56,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1206 ||:  24%|##3       | 26/110 [00:17<00:54,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1202 ||:  25%|##4       | 27/110 [00:18<00:53,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1196 ||:  25%|##5       | 28/110 [00:18<00:55,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1201 ||:  26%|##6       | 29/110 [00:19<00:54,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1194 ||:  27%|##7       | 30/110 [00:20<00:51,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1185 ||:  28%|##8       | 31/110 [00:20<00:49,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1176 ||:  29%|##9       | 32/110 [00:21<00:49,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1167 ||:  30%|###       | 33/110 [00:22<00:48,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1167 ||:  31%|###       | 34/110 [00:22<00:50,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1175 ||:  32%|###1      | 35/110 [00:23<00:47,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1177 ||:  33%|###2      | 36/110 [00:24<00:47,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1176 ||:  34%|###3      | 37/110 [00:24<00:45,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1168 ||:  35%|###4      | 38/110 [00:25<00:48,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1175 ||:  35%|###5      | 39/110 [00:26<00:47,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1171 ||:  36%|###6      | 40/110 [00:26<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1166 ||:  37%|###7      | 41/110 [00:27<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1176 ||:  38%|###8      | 42/110 [00:28<00:44,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1182 ||:  39%|###9      | 43/110 [00:28<00:42,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1175 ||:  40%|####      | 44/110 [00:29<00:44,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1177 ||:  41%|####      | 45/110 [00:29<00:42,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1179 ||:  42%|####1     | 46/110 [00:30<00:44,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1181 ||:  43%|####2     | 47/110 [00:31<00:42,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1181 ||:  44%|####3     | 48/110 [00:32<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1186 ||:  45%|####4     | 49/110 [00:32<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1182 ||:  45%|####5     | 50/110 [00:33<00:41,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1178 ||:  46%|####6     | 51/110 [00:34<00:39,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1189 ||:  47%|####7     | 52/110 [00:34<00:37,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1202 ||:  48%|####8     | 53/110 [00:35<00:36,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1199 ||:  49%|####9     | 54/110 [00:35<00:35,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1200 ||:  50%|#####     | 55/110 [00:36<00:37,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1204 ||:  51%|#####     | 56/110 [00:37<00:37,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1205 ||:  52%|#####1    | 57/110 [00:38<00:35,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1204 ||:  53%|#####2    | 58/110 [00:38<00:33,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1207 ||:  54%|#####3    | 59/110 [00:39<00:32,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1204 ||:  55%|#####4    | 60/110 [00:40<00:44,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1199 ||:  55%|#####5    | 61/110 [00:41<00:39,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1203 ||:  56%|#####6    | 62/110 [00:42<00:37,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1204 ||:  57%|#####7    | 63/110 [00:42<00:35,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1208 ||:  58%|#####8    | 64/110 [00:43<00:33,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1205 ||:  59%|#####9    | 65/110 [00:44<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1209 ||:  60%|######    | 66/110 [00:44<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1207 ||:  61%|######    | 67/110 [00:45<00:28,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1203 ||:  62%|######1   | 68/110 [00:45<00:27,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1201 ||:  63%|######2   | 69/110 [00:46<00:27,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1204 ||:  64%|######3   | 70/110 [00:47<00:27,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1212 ||:  65%|######4   | 71/110 [00:48<00:28,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1211 ||:  65%|######5   | 72/110 [00:48<00:27,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1216 ||:  66%|######6   | 73/110 [00:49<00:25,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1214 ||:  67%|######7   | 74/110 [00:50<00:23,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1215 ||:  68%|######8   | 75/110 [00:50<00:23,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1216 ||:  69%|######9   | 76/110 [00:51<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1215 ||:  70%|#######   | 77/110 [00:52<00:22,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1215 ||:  71%|#######   | 78/110 [00:52<00:20,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1212 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1210 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1205 ||:  74%|#######3  | 81/110 [00:54<00:18,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1208 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1206 ||:  75%|#######5  | 83/110 [00:56<00:18,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1203 ||:  76%|#######6  | 84/110 [00:56<00:17,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1203 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1212 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1209 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1206 ||:  80%|########  | 88/110 [00:59<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1200 ||:  81%|########  | 89/110 [01:00<00:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1205 ||:  82%|########1 | 90/110 [01:01<00:14,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1205 ||:  83%|########2 | 91/110 [01:01<00:13,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1212 ||:  84%|########3 | 92/110 [01:02<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1212 ||:  85%|########4 | 93/110 [01:02<00:11,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1214 ||:  85%|########5 | 94/110 [01:03<00:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1213 ||:  86%|########6 | 95/110 [01:04<00:09,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1209 ||:  87%|########7 | 96/110 [01:04<00:09,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1206 ||:  88%|########8 | 97/110 [01:05<00:09,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1200 ||:  89%|########9 | 98/110 [01:06<00:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1198 ||:  90%|######### | 99/110 [01:07<00:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1196 ||:  91%|######### | 100/110 [01:07<00:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1194 ||:  92%|#########1| 101/110 [01:08<00:06,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1192 ||:  93%|#########2| 102/110 [01:09<00:06,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1192 ||:  94%|#########3| 103/110 [01:10<00:05,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1191 ||:  95%|#########4| 104/110 [01:10<00:04,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1194 ||:  95%|#########5| 105/110 [01:11<00:03,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1196 ||:  96%|#########6| 106/110 [01:12<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1195 ||:  97%|#########7| 107/110 [01:12<00:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1193 ||:  98%|#########8| 108/110 [01:13<00:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1191 ||:  99%|#########9| 109/110 [01:14<00:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1198 ||: 100%|##########| 110/110 [01:14<00:00,  1.79it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1198 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7258, acc: 0.6875, no_result: 0.0625, loss: 0.9641 ||:   4%|4         | 1/24 [00:00<00:08,  2.65it/s]
+BLEU: 0.6768, acc: 0.6875, no_result: 0.0781, loss: 0.8573 ||:   8%|8         | 2/24 [00:00<00:07,  2.77it/s]
+BLEU: 0.6651, acc: 0.6667, no_result: 0.1042, loss: 0.9562 ||:  12%|#2        | 3/24 [00:01<00:07,  2.79it/s]
+BLEU: 0.6880, acc: 0.6641, no_result: 0.1016, loss: 0.9278 ||:  17%|#6        | 4/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7165, acc: 0.6813, no_result: 0.1062, loss: 0.9093 ||:  21%|##        | 5/24 [00:01<00:07,  2.67it/s]
+BLEU: 0.7104, acc: 0.6562, no_result: 0.0938, loss: 0.9058 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.7180, acc: 0.6696, no_result: 0.0893, loss: 0.8881 ||:  29%|##9       | 7/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.7156, acc: 0.6797, no_result: 0.0938, loss: 0.9008 ||:  33%|###3      | 8/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.6994, acc: 0.6806, no_result: 0.1007, loss: 0.8949 ||:  38%|###7      | 9/24 [00:03<00:05,  2.89it/s]
+BLEU: 0.7076, acc: 0.6656, no_result: 0.0938, loss: 0.9069 ||:  42%|####1     | 10/24 [00:03<00:04,  2.85it/s]
+BLEU: 0.7108, acc: 0.6534, no_result: 0.0966, loss: 0.9468 ||:  46%|####5     | 11/24 [00:03<00:04,  2.73it/s]
+BLEU: 0.7065, acc: 0.6510, no_result: 0.0964, loss: 0.9464 ||:  50%|#####     | 12/24 [00:04<00:04,  2.82it/s]
+BLEU: 0.7014, acc: 0.6490, no_result: 0.0986, loss: 0.9737 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.72it/s]
+BLEU: 0.7023, acc: 0.6406, no_result: 0.1004, loss: 1.0153 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.63it/s]
+BLEU: 0.6966, acc: 0.6312, no_result: 0.1187, loss: 1.0710 ||:  62%|######2   | 15/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.6938, acc: 0.6211, no_result: 0.1250, loss: 1.0930 ||:  67%|######6   | 16/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.6927, acc: 0.6158, no_result: 0.1342, loss: 1.0876 ||:  71%|#######   | 17/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.6924, acc: 0.6128, no_result: 0.1372, loss: 1.0949 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.6961, acc: 0.6217, no_result: 0.1332, loss: 1.0722 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.6981, acc: 0.6266, no_result: 0.1297, loss: 1.0631 ||:  83%|########3 | 20/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.6968, acc: 0.6250, no_result: 0.1265, loss: 1.0632 ||:  88%|########7 | 21/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7024, acc: 0.6179, no_result: 0.1236, loss: 1.0607 ||:  92%|#########1| 22/24 [00:08<00:00,  2.65it/s]
+BLEU: 0.7053, acc: 0.6277, no_result: 0.1182, loss: 1.0432 ||:  96%|#########5| 23/24 [00:08<00:00,  2.75it/s]
+BLEU: 0.7046, acc: 0.6247, no_result: 0.1179, loss: 1.0616 ||: 100%|##########| 24/24 [00:08<00:00,  3.40it/s]
+BLEU: 0.7046, acc: 0.6247, no_result: 0.1179, loss: 1.0616 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1133 ||:   1%|          | 1/110 [00:00<01:26,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.1057 ||:   2%|1         | 2/110 [00:01<01:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0993 ||:   3%|2         | 3/110 [00:02<01:11,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0939 ||:   4%|3         | 4/110 [00:02<01:08,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0899 ||:   5%|4         | 5/110 [00:03<01:06,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0912 ||:   5%|5         | 6/110 [00:03<01:07,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0870 ||:   6%|6         | 7/110 [00:04<01:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0930 ||:   7%|7         | 8/110 [00:05<01:06,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0914 ||:   8%|8         | 9/110 [00:06<01:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0908 ||:   9%|9         | 10/110 [00:06<01:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0935 ||:  10%|#         | 11/110 [00:07<01:05,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0934 ||:  11%|#         | 12/110 [00:08<01:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0932 ||:  12%|#1        | 13/110 [00:08<01:03,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0908 ||:  13%|#2        | 14/110 [00:09<01:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0898 ||:  14%|#3        | 15/110 [00:10<01:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0910 ||:  15%|#4        | 16/110 [00:10<01:04,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0901 ||:  15%|#5        | 17/110 [00:11<01:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0890 ||:  16%|#6        | 18/110 [00:12<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0905 ||:  17%|#7        | 19/110 [00:12<01:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0900 ||:  18%|#8        | 20/110 [00:13<01:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0914 ||:  19%|#9        | 21/110 [00:14<01:04,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0918 ||:  20%|##        | 22/110 [00:14<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0923 ||:  21%|##        | 23/110 [00:15<00:59,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0940 ||:  22%|##1       | 24/110 [00:16<01:00,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0944 ||:  23%|##2       | 25/110 [00:17<01:00,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0933 ||:  24%|##3       | 26/110 [00:17<00:58,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0918 ||:  25%|##4       | 27/110 [00:18<00:59,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0909 ||:  25%|##5       | 28/110 [00:19<00:56,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0916 ||:  26%|##6       | 29/110 [00:19<00:56,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0921 ||:  27%|##7       | 30/110 [00:20<00:54,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0929 ||:  28%|##8       | 31/110 [00:21<00:52,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0934 ||:  29%|##9       | 32/110 [00:21<00:51,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0921 ||:  30%|###       | 33/110 [00:22<00:48,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0924 ||:  31%|###       | 34/110 [00:23<00:49,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0931 ||:  32%|###1      | 35/110 [00:23<00:50,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0930 ||:  33%|###2      | 36/110 [00:24<00:50,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0932 ||:  34%|###3      | 37/110 [00:25<00:49,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0927 ||:  35%|###4      | 38/110 [00:25<00:49,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0917 ||:  35%|###5      | 39/110 [00:26<00:48,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0921 ||:  36%|###6      | 40/110 [00:27<00:47,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0923 ||:  37%|###7      | 41/110 [00:27<00:44,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0924 ||:  38%|###8      | 42/110 [00:28<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0918 ||:  39%|###9      | 43/110 [00:29<00:43,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0913 ||:  40%|####      | 44/110 [00:29<00:44,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0910 ||:  41%|####      | 45/110 [00:30<00:42,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0907 ||:  42%|####1     | 46/110 [00:31<00:43,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0904 ||:  43%|####2     | 47/110 [00:31<00:41,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0911 ||:  44%|####3     | 48/110 [00:32<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0915 ||:  45%|####4     | 49/110 [00:33<00:40,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0912 ||:  45%|####5     | 50/110 [00:34<00:55,  1.08it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0924 ||:  46%|####6     | 51/110 [00:35<00:52,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0929 ||:  47%|####7     | 52/110 [00:36<00:47,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0927 ||:  48%|####8     | 53/110 [00:36<00:43,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0930 ||:  49%|####9     | 54/110 [00:37<00:39,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0924 ||:  50%|#####     | 55/110 [00:38<00:38,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0921 ||:  51%|#####     | 56/110 [00:38<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0921 ||:  52%|#####1    | 57/110 [00:39<00:36,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0915 ||:  53%|#####2    | 58/110 [00:40<00:39,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0917 ||:  54%|#####3    | 59/110 [00:40<00:36,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0919 ||:  55%|#####4    | 60/110 [00:41<00:35,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0922 ||:  55%|#####5    | 61/110 [00:42<00:35,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0921 ||:  56%|#####6    | 62/110 [00:42<00:32,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0924 ||:  57%|#####7    | 63/110 [00:43<00:32,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0923 ||:  58%|#####8    | 64/110 [00:44<00:31,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0929 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0927 ||:  60%|######    | 66/110 [00:45<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0920 ||:  61%|######    | 67/110 [00:46<00:27,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0925 ||:  62%|######1   | 68/110 [00:46<00:26,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0924 ||:  63%|######2   | 69/110 [00:47<00:26,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0925 ||:  64%|######3   | 70/110 [00:48<00:26,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0934 ||:  65%|######4   | 71/110 [00:48<00:25,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0933 ||:  65%|######5   | 72/110 [00:49<00:26,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0938 ||:  66%|######6   | 73/110 [00:50<00:24,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0939 ||:  67%|######7   | 74/110 [00:51<00:25,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0939 ||:  68%|######8   | 75/110 [00:51<00:24,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0941 ||:  69%|######9   | 76/110 [00:52<00:23,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0945 ||:  70%|#######   | 77/110 [00:53<00:25,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0947 ||:  71%|#######   | 78/110 [00:53<00:22,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0949 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0954 ||:  73%|#######2  | 80/110 [00:55<00:20,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0956 ||:  74%|#######3  | 81/110 [00:55<00:19,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0952 ||:  75%|#######4  | 82/110 [00:56<00:18,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0955 ||:  75%|#######5  | 83/110 [00:57<00:17,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0958 ||:  76%|#######6  | 84/110 [00:57<00:16,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0954 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0955 ||:  78%|#######8  | 86/110 [00:59<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0950 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0950 ||:  80%|########  | 88/110 [01:00<00:14,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0953 ||:  81%|########  | 89/110 [01:01<00:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0954 ||:  82%|########1 | 90/110 [01:02<00:14,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0952 ||:  83%|########2 | 91/110 [01:02<00:14,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0954 ||:  84%|########3 | 92/110 [01:03<00:12,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0954 ||:  85%|########4 | 93/110 [01:04<00:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0954 ||:  85%|########5 | 94/110 [01:04<00:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0952 ||:  86%|########6 | 95/110 [01:05<00:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0954 ||:  87%|########7 | 96/110 [01:05<00:08,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0951 ||:  88%|########8 | 97/110 [01:06<00:08,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0951 ||:  89%|########9 | 98/110 [01:07<00:07,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0955 ||:  90%|######### | 99/110 [01:08<00:07,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0955 ||:  91%|######### | 100/110 [01:08<00:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0954 ||:  92%|#########1| 101/110 [01:09<00:05,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0956 ||:  93%|#########2| 102/110 [01:09<00:05,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0956 ||:  94%|#########3| 103/110 [01:10<00:04,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0954 ||:  95%|#########4| 104/110 [01:11<00:03,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0963 ||:  95%|#########5| 105/110 [01:12<00:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0963 ||:  96%|#########6| 106/110 [01:12<00:02,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0964 ||:  97%|#########7| 107/110 [01:13<00:02,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0963 ||:  98%|#########8| 108/110 [01:13<00:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0963 ||:  99%|#########9| 109/110 [01:14<00:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0961 ||: 100%|##########| 110/110 [01:14<00:00,  1.78it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0961 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6941, acc: 0.6250, no_result: 0.0312, loss: 1.0173 ||:   4%|4         | 1/24 [00:00<00:08,  2.78it/s]
+BLEU: 0.6464, acc: 0.6562, no_result: 0.0781, loss: 0.9362 ||:   8%|8         | 2/24 [00:00<00:07,  2.83it/s]
+BLEU: 0.6518, acc: 0.6250, no_result: 0.1042, loss: 1.0218 ||:  12%|#2        | 3/24 [00:01<00:07,  2.83it/s]
+BLEU: 0.6752, acc: 0.6406, no_result: 0.1016, loss: 0.9781 ||:  17%|#6        | 4/24 [00:01<00:07,  2.73it/s]
+BLEU: 0.7092, acc: 0.6625, no_result: 0.1000, loss: 0.9574 ||:  21%|##        | 5/24 [00:01<00:07,  2.68it/s]
+BLEU: 0.7052, acc: 0.6406, no_result: 0.0885, loss: 0.9574 ||:  25%|##5       | 6/24 [00:02<00:06,  2.81it/s]
+BLEU: 0.7106, acc: 0.6562, no_result: 0.0893, loss: 0.9482 ||:  29%|##9       | 7/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7114, acc: 0.6641, no_result: 0.0898, loss: 0.9573 ||:  33%|###3      | 8/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.7044, acc: 0.6701, no_result: 0.1007, loss: 0.9475 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7128, acc: 0.6562, no_result: 0.1031, loss: 0.9606 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.7116, acc: 0.6477, no_result: 0.0966, loss: 1.0049 ||:  46%|####5     | 11/24 [00:03<00:04,  2.73it/s]
+BLEU: 0.7076, acc: 0.6458, no_result: 0.0964, loss: 1.0138 ||:  50%|#####     | 12/24 [00:04<00:04,  2.82it/s]
+BLEU: 0.7019, acc: 0.6466, no_result: 0.0986, loss: 1.0334 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7032, acc: 0.6295, no_result: 0.0960, loss: 1.0744 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.62it/s]
+BLEU: 0.6999, acc: 0.6208, no_result: 0.1146, loss: 1.1241 ||:  62%|######2   | 15/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.7000, acc: 0.6152, no_result: 0.1191, loss: 1.1442 ||:  67%|######6   | 16/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.6979, acc: 0.6103, no_result: 0.1232, loss: 1.1373 ||:  71%|#######   | 17/24 [00:06<00:02,  2.48it/s]
+BLEU: 0.6973, acc: 0.6076, no_result: 0.1285, loss: 1.1471 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.6998, acc: 0.6118, no_result: 0.1266, loss: 1.1262 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.7016, acc: 0.6188, no_result: 0.1234, loss: 1.1159 ||:  83%|########3 | 20/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.6982, acc: 0.6146, no_result: 0.1190, loss: 1.1155 ||:  88%|########7 | 21/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7018, acc: 0.6065, no_result: 0.1193, loss: 1.1123 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.7058, acc: 0.6155, no_result: 0.1155, loss: 1.0933 ||:  96%|#########5| 23/24 [00:08<00:00,  2.72it/s]
+BLEU: 0.7048, acc: 0.6130, no_result: 0.1153, loss: 1.1123 ||: 100%|##########| 24/24 [00:08<00:00,  3.36it/s]
+BLEU: 0.7048, acc: 0.6130, no_result: 0.1153, loss: 1.1123 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0598 ||:   1%|          | 1/110 [00:00<01:19,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0589 ||:   2%|1         | 2/110 [00:01<01:11,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0590 ||:   3%|2         | 3/110 [00:01<01:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0680 ||:   4%|3         | 4/110 [00:02<01:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0725 ||:   5%|4         | 5/110 [00:03<01:07,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0723 ||:   5%|5         | 6/110 [00:03<01:06,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0740 ||:   6%|6         | 7/110 [00:04<01:04,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0720 ||:   7%|7         | 8/110 [00:05<01:03,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0768 ||:   8%|8         | 9/110 [00:05<01:03,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0768 ||:   9%|9         | 10/110 [00:06<01:02,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0793 ||:  10%|#         | 11/110 [00:07<01:02,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0800 ||:  11%|#         | 12/110 [00:07<01:03,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0800 ||:  12%|#1        | 13/110 [00:08<01:05,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0789 ||:  13%|#2        | 14/110 [00:09<01:03,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0792 ||:  14%|#3        | 15/110 [00:09<01:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0783 ||:  15%|#4        | 16/110 [00:10<01:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0805 ||:  15%|#5        | 17/110 [00:11<01:07,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0808 ||:  16%|#6        | 18/110 [00:11<01:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0812 ||:  17%|#7        | 19/110 [00:12<01:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0802 ||:  18%|#8        | 20/110 [00:13<01:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0792 ||:  19%|#9        | 21/110 [00:13<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0792 ||:  20%|##        | 22/110 [00:14<00:58,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0808 ||:  21%|##        | 23/110 [00:15<00:59,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0812 ||:  22%|##1       | 24/110 [00:16<01:00,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0809 ||:  23%|##2       | 25/110 [00:16<00:59,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0806 ||:  24%|##3       | 26/110 [00:17<00:58,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0811 ||:  25%|##4       | 27/110 [00:17<00:54,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0805 ||:  25%|##5       | 28/110 [00:18<00:58,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0811 ||:  26%|##6       | 29/110 [00:19<00:55,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0822 ||:  27%|##7       | 30/110 [00:20<00:52,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0817 ||:  28%|##8       | 31/110 [00:20<00:51,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0812 ||:  29%|##9       | 32/110 [00:21<00:51,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0822 ||:  30%|###       | 33/110 [00:22<00:52,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0832 ||:  31%|###       | 34/110 [00:22<00:54,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0841 ||:  32%|###1      | 35/110 [00:23<00:56,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0848 ||:  33%|###2      | 36/110 [00:24<00:54,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0850 ||:  34%|###3      | 37/110 [00:25<00:50,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0845 ||:  35%|###4      | 38/110 [00:25<00:49,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0840 ||:  35%|###5      | 39/110 [00:26<00:48,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0835 ||:  36%|###6      | 40/110 [00:28<01:07,  1.03it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0841 ||:  37%|###7      | 41/110 [00:28<01:00,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0839 ||:  38%|###8      | 42/110 [00:29<00:54,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0836 ||:  39%|###9      | 43/110 [00:30<00:52,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0842 ||:  40%|####      | 44/110 [00:30<00:49,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0846 ||:  41%|####      | 45/110 [00:31<00:47,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0845 ||:  42%|####1     | 46/110 [00:32<00:44,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0840 ||:  43%|####2     | 47/110 [00:32<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0840 ||:  44%|####3     | 48/110 [00:33<00:40,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0852 ||:  45%|####4     | 49/110 [00:33<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0851 ||:  45%|####5     | 50/110 [00:34<00:42,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0848 ||:  46%|####6     | 51/110 [00:35<00:41,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0853 ||:  47%|####7     | 52/110 [00:36<00:38,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0852 ||:  48%|####8     | 53/110 [00:36<00:37,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0846 ||:  49%|####9     | 54/110 [00:37<00:36,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0845 ||:  50%|#####     | 55/110 [00:37<00:34,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0845 ||:  51%|#####     | 56/110 [00:38<00:33,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0843 ||:  52%|#####1    | 57/110 [00:39<00:33,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0845 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0849 ||:  54%|#####3    | 59/110 [00:40<00:37,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0849 ||:  55%|#####4    | 60/110 [00:41<00:35,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0853 ||:  55%|#####5    | 61/110 [00:42<00:33,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0849 ||:  56%|#####6    | 62/110 [00:42<00:31,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0848 ||:  57%|#####7    | 63/110 [00:43<00:31,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0845 ||:  58%|#####8    | 64/110 [00:44<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0848 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0848 ||:  60%|######    | 66/110 [00:45<00:28,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0849 ||:  61%|######    | 67/110 [00:46<00:28,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0852 ||:  62%|######1   | 68/110 [00:46<00:28,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0848 ||:  63%|######2   | 69/110 [00:47<00:27,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0851 ||:  64%|######3   | 70/110 [00:48<00:26,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0856 ||:  65%|######4   | 71/110 [00:48<00:24,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0854 ||:  65%|######5   | 72/110 [00:49<00:23,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0848 ||:  66%|######6   | 73/110 [00:49<00:24,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0847 ||:  67%|######7   | 74/110 [00:50<00:23,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0846 ||:  68%|######8   | 75/110 [00:51<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0843 ||:  69%|######9   | 76/110 [00:52<00:23,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0847 ||:  70%|#######   | 77/110 [00:52<00:22,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0844 ||:  71%|#######   | 78/110 [00:53<00:21,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0847 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0849 ||:  73%|#######2  | 80/110 [00:54<00:21,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0853 ||:  74%|#######3  | 81/110 [00:55<00:20,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0853 ||:  75%|#######4  | 82/110 [00:56<00:19,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0857 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0856 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0857 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0855 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0855 ||:  79%|#######9  | 87/110 [00:59<00:16,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0858 ||:  80%|########  | 88/110 [01:00<00:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0857 ||:  81%|########  | 89/110 [01:01<00:14,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0863 ||:  82%|########1 | 90/110 [01:01<00:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0863 ||:  83%|########2 | 91/110 [01:02<00:13,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0865 ||:  84%|########3 | 92/110 [01:03<00:12,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0862 ||:  85%|########4 | 93/110 [01:03<00:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0861 ||:  85%|########5 | 94/110 [01:04<00:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0864 ||:  86%|########6 | 95/110 [01:05<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0865 ||:  87%|########7 | 96/110 [01:05<00:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0866 ||:  88%|########8 | 97/110 [01:06<00:08,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0868 ||:  89%|########9 | 98/110 [01:07<00:07,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0866 ||:  90%|######### | 99/110 [01:07<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0867 ||:  91%|######### | 100/110 [01:08<00:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0864 ||:  92%|#########1| 101/110 [01:08<00:05,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0867 ||:  93%|#########2| 102/110 [01:09<00:05,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0867 ||:  94%|#########3| 103/110 [01:10<00:04,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0865 ||:  95%|#########4| 104/110 [01:10<00:03,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0863 ||:  95%|#########5| 105/110 [01:11<00:03,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0862 ||:  96%|#########6| 106/110 [01:12<00:02,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0866 ||:  97%|#########7| 107/110 [01:12<00:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0867 ||:  98%|#########8| 108/110 [01:13<00:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0870 ||:  99%|#########9| 109/110 [01:14<00:00,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0874 ||: 100%|##########| 110/110 [01:14<00:00,  1.72it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0874 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6941, acc: 0.6562, no_result: 0.0312, loss: 1.0161 ||:   4%|4         | 1/24 [00:00<00:08,  2.81it/s]
+BLEU: 0.6848, acc: 0.7031, no_result: 0.0625, loss: 0.9198 ||:   8%|8         | 2/24 [00:00<00:07,  2.83it/s]
+BLEU: 0.6489, acc: 0.6562, no_result: 0.0938, loss: 1.0285 ||:  12%|#2        | 3/24 [00:01<00:07,  2.86it/s]
+BLEU: 0.6767, acc: 0.6484, no_result: 0.1016, loss: 0.9893 ||:  17%|#6        | 4/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.7020, acc: 0.6625, no_result: 0.1062, loss: 0.9611 ||:  21%|##        | 5/24 [00:01<00:07,  2.67it/s]
+BLEU: 0.6990, acc: 0.6562, no_result: 0.1042, loss: 0.9568 ||:  25%|##5       | 6/24 [00:02<00:06,  2.79it/s]
+BLEU: 0.7045, acc: 0.6696, no_result: 0.1027, loss: 0.9442 ||:  29%|##9       | 7/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.7035, acc: 0.6797, no_result: 0.1016, loss: 0.9585 ||:  33%|###3      | 8/24 [00:02<00:05,  2.96it/s]
+BLEU: 0.6937, acc: 0.6910, no_result: 0.0972, loss: 0.9500 ||:  38%|###7      | 9/24 [00:03<00:05,  2.91it/s]
+BLEU: 0.7000, acc: 0.6844, no_result: 0.0969, loss: 0.9610 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.7005, acc: 0.6761, no_result: 0.0938, loss: 1.0042 ||:  46%|####5     | 11/24 [00:03<00:04,  2.71it/s]
+BLEU: 0.6985, acc: 0.6745, no_result: 0.0911, loss: 1.0101 ||:  50%|#####     | 12/24 [00:04<00:04,  2.79it/s]
+BLEU: 0.6946, acc: 0.6755, no_result: 0.0938, loss: 1.0307 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.6959, acc: 0.6652, no_result: 0.0938, loss: 1.0708 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.62it/s]
+BLEU: 0.6862, acc: 0.6479, no_result: 0.1125, loss: 1.1256 ||:  62%|######2   | 15/24 [00:05<00:03,  2.43it/s]
+BLEU: 0.6859, acc: 0.6426, no_result: 0.1172, loss: 1.1507 ||:  67%|######6   | 16/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.6857, acc: 0.6324, no_result: 0.1287, loss: 1.1456 ||:  71%|#######   | 17/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.6841, acc: 0.6302, no_result: 0.1302, loss: 1.1529 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.6881, acc: 0.6398, no_result: 0.1266, loss: 1.1310 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.6916, acc: 0.6453, no_result: 0.1234, loss: 1.1193 ||:  83%|########3 | 20/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.6891, acc: 0.6429, no_result: 0.1190, loss: 1.1207 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.6942, acc: 0.6378, no_result: 0.1193, loss: 1.1212 ||:  92%|#########1| 22/24 [00:08<00:00,  2.63it/s]
+BLEU: 0.6982, acc: 0.6454, no_result: 0.1155, loss: 1.1026 ||:  96%|#########5| 23/24 [00:08<00:00,  2.73it/s]
+BLEU: 0.6984, acc: 0.6416, no_result: 0.1153, loss: 1.1212 ||: 100%|##########| 24/24 [00:08<00:00,  3.39it/s]
+BLEU: 0.6984, acc: 0.6416, no_result: 0.1153, loss: 1.1212 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0766 ||:   1%|          | 1/110 [00:00<01:13,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0708 ||:   2%|1         | 2/110 [00:01<01:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0721 ||:   3%|2         | 3/110 [00:02<01:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0684 ||:   4%|3         | 4/110 [00:02<01:14,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0704 ||:   5%|4         | 5/110 [00:03<01:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0713 ||:   5%|5         | 6/110 [00:04<01:08,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0808 ||:   6%|6         | 7/110 [00:04<01:07,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0809 ||:   7%|7         | 8/110 [00:05<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0814 ||:   8%|8         | 9/110 [00:06<01:06,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0775 ||:   9%|9         | 10/110 [00:06<01:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0763 ||:  10%|#         | 11/110 [00:07<01:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0759 ||:  11%|#         | 12/110 [00:08<01:13,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0780 ||:  12%|#1        | 13/110 [00:08<01:09,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0765 ||:  13%|#2        | 14/110 [00:09<01:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0792 ||:  14%|#3        | 15/110 [00:10<01:02,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0768 ||:  15%|#4        | 16/110 [00:10<01:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0757 ||:  15%|#5        | 17/110 [00:11<01:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0775 ||:  16%|#6        | 18/110 [00:12<01:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0770 ||:  17%|#7        | 19/110 [00:12<01:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0756 ||:  18%|#8        | 20/110 [00:13<00:58,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0749 ||:  19%|#9        | 21/110 [00:14<00:58,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0737 ||:  20%|##        | 22/110 [00:14<00:59,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0749 ||:  21%|##        | 23/110 [00:15<00:57,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0744 ||:  22%|##1       | 24/110 [00:16<00:55,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0754 ||:  23%|##2       | 25/110 [00:16<00:57,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0748 ||:  24%|##3       | 26/110 [00:17<00:58,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0740 ||:  25%|##4       | 27/110 [00:18<00:55,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0734 ||:  25%|##5       | 28/110 [00:18<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0731 ||:  26%|##6       | 29/110 [00:19<00:52,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0731 ||:  27%|##7       | 30/110 [00:20<01:09,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0751 ||:  28%|##8       | 31/110 [00:21<01:04,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0756 ||:  29%|##9       | 32/110 [00:22<00:59,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0754 ||:  30%|###       | 33/110 [00:22<00:55,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0754 ||:  31%|###       | 34/110 [00:23<00:52,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0750 ||:  32%|###1      | 35/110 [00:24<00:50,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0748 ||:  33%|###2      | 36/110 [00:24<00:51,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0744 ||:  34%|###3      | 37/110 [00:25<00:49,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0743 ||:  35%|###4      | 38/110 [00:26<00:47,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0740 ||:  35%|###5      | 39/110 [00:26<00:46,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0744 ||:  36%|###6      | 40/110 [00:27<00:47,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||:  37%|###7      | 41/110 [00:28<00:45,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0732 ||:  38%|###8      | 42/110 [00:28<00:43,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0728 ||:  39%|###9      | 43/110 [00:29<00:44,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0728 ||:  40%|####      | 44/110 [00:30<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0734 ||:  41%|####      | 45/110 [00:30<00:44,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0736 ||:  42%|####1     | 46/110 [00:31<00:45,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0735 ||:  43%|####2     | 47/110 [00:32<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0735 ||:  44%|####3     | 48/110 [00:32<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0743 ||:  45%|####4     | 49/110 [00:33<00:40,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0745 ||:  45%|####5     | 50/110 [00:34<00:39,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0757 ||:  46%|####6     | 51/110 [00:34<00:39,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0771 ||:  47%|####7     | 52/110 [00:35<00:38,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0766 ||:  48%|####8     | 53/110 [00:36<00:38,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0766 ||:  49%|####9     | 54/110 [00:36<00:36,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0770 ||:  50%|#####     | 55/110 [00:37<00:37,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0769 ||:  51%|#####     | 56/110 [00:38<00:37,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0764 ||:  52%|#####1    | 57/110 [00:38<00:36,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0767 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0767 ||:  54%|#####3    | 59/110 [00:40<00:34,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0773 ||:  55%|#####4    | 60/110 [00:40<00:32,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0777 ||:  55%|#####5    | 61/110 [00:41<00:31,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0779 ||:  56%|#####6    | 62/110 [00:42<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0783 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0786 ||:  58%|#####8    | 64/110 [00:43<00:32,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0789 ||:  59%|#####9    | 65/110 [00:44<00:31,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0790 ||:  60%|######    | 66/110 [00:44<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0788 ||:  61%|######    | 67/110 [00:45<00:28,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0792 ||:  62%|######1   | 68/110 [00:46<00:28,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0790 ||:  63%|######2   | 69/110 [00:46<00:27,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0791 ||:  64%|######3   | 70/110 [00:47<00:29,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0792 ||:  65%|######4   | 71/110 [00:48<00:28,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0789 ||:  65%|######5   | 72/110 [00:49<00:27,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0788 ||:  66%|######6   | 73/110 [00:49<00:25,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0785 ||:  67%|######7   | 74/110 [00:50<00:26,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0788 ||:  68%|######8   | 75/110 [00:51<00:24,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0785 ||:  69%|######9   | 76/110 [00:52<00:25,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0786 ||:  70%|#######   | 77/110 [00:52<00:23,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0785 ||:  71%|#######   | 78/110 [00:53<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0790 ||:  72%|#######1  | 79/110 [00:54<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0800 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0806 ||:  74%|#######3  | 81/110 [00:55<00:17,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0811 ||:  75%|#######4  | 82/110 [00:55<00:17,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0815 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0816 ||:  76%|#######6  | 84/110 [00:57<00:16,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0815 ||:  77%|#######7  | 85/110 [00:57<00:15,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0817 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0815 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0811 ||:  80%|########  | 88/110 [00:59<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0816 ||:  81%|########  | 89/110 [01:00<00:14,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0818 ||:  82%|########1 | 90/110 [01:01<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0819 ||:  83%|########2 | 91/110 [01:02<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0822 ||:  84%|########3 | 92/110 [01:02<00:11,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0820 ||:  85%|########4 | 93/110 [01:03<00:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0817 ||:  85%|########5 | 94/110 [01:04<00:11,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0821 ||:  86%|########6 | 95/110 [01:04<00:10,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0818 ||:  87%|########7 | 96/110 [01:05<00:10,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0820 ||:  88%|########8 | 97/110 [01:06<00:09,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0821 ||:  89%|########9 | 98/110 [01:07<00:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0820 ||:  90%|######### | 99/110 [01:07<00:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0820 ||:  91%|######### | 100/110 [01:08<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0821 ||:  92%|#########1| 101/110 [01:09<00:06,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0820 ||:  93%|#########2| 102/110 [01:09<00:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0823 ||:  94%|#########3| 103/110 [01:10<00:04,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0820 ||:  95%|#########4| 104/110 [01:11<00:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0822 ||:  95%|#########5| 105/110 [01:11<00:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0820 ||:  96%|#########6| 106/110 [01:12<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0821 ||:  97%|#########7| 107/110 [01:13<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0822 ||:  98%|#########8| 108/110 [01:13<00:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0820 ||:  99%|#########9| 109/110 [01:14<00:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0821 ||: 100%|##########| 110/110 [01:14<00:00,  1.77it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0821 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6986, acc: 0.7188, no_result: 0.0312, loss: 0.9725 ||:   4%|4         | 1/24 [00:00<00:08,  2.78it/s]
+BLEU: 0.6650, acc: 0.6562, no_result: 0.0781, loss: 0.9460 ||:   8%|8         | 2/24 [00:00<00:07,  2.84it/s]
+BLEU: 0.6445, acc: 0.6354, no_result: 0.1042, loss: 1.0606 ||:  12%|#2        | 3/24 [00:01<00:07,  2.84it/s]
+BLEU: 0.6827, acc: 0.6484, no_result: 0.0938, loss: 0.9983 ||:  17%|#6        | 4/24 [00:01<00:07,  2.77it/s]
+BLEU: 0.7099, acc: 0.6687, no_result: 0.0938, loss: 0.9676 ||:  21%|##        | 5/24 [00:01<00:06,  2.73it/s]
+BLEU: 0.7058, acc: 0.6562, no_result: 0.0833, loss: 0.9662 ||:  25%|##5       | 6/24 [00:02<00:06,  2.82it/s]
+BLEU: 0.7158, acc: 0.6696, no_result: 0.0804, loss: 0.9497 ||:  29%|##9       | 7/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.7127, acc: 0.6758, no_result: 0.0820, loss: 0.9657 ||:  33%|###3      | 8/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.7044, acc: 0.6840, no_result: 0.0868, loss: 0.9593 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7135, acc: 0.6750, no_result: 0.0781, loss: 0.9725 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.7133, acc: 0.6676, no_result: 0.0824, loss: 1.0147 ||:  46%|####5     | 11/24 [00:03<00:04,  2.73it/s]
+BLEU: 0.7102, acc: 0.6745, no_result: 0.0781, loss: 1.0247 ||:  50%|#####     | 12/24 [00:04<00:04,  2.84it/s]
+BLEU: 0.7081, acc: 0.6731, no_result: 0.0841, loss: 1.0495 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.73it/s]
+BLEU: 0.7089, acc: 0.6607, no_result: 0.0826, loss: 1.0945 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.64it/s]
+BLEU: 0.6942, acc: 0.6417, no_result: 0.1021, loss: 1.1495 ||:  62%|######2   | 15/24 [00:05<00:03,  2.45it/s]
+BLEU: 0.6863, acc: 0.6328, no_result: 0.1055, loss: 1.1714 ||:  67%|######6   | 16/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.6841, acc: 0.6250, no_result: 0.1158, loss: 1.1646 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.6830, acc: 0.6198, no_result: 0.1215, loss: 1.1755 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.6870, acc: 0.6316, no_result: 0.1184, loss: 1.1530 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.55it/s]
+BLEU: 0.6897, acc: 0.6359, no_result: 0.1156, loss: 1.1401 ||:  83%|########3 | 20/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.6865, acc: 0.6310, no_result: 0.1146, loss: 1.1442 ||:  88%|########7 | 21/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.6885, acc: 0.6236, no_result: 0.1136, loss: 1.1389 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.6927, acc: 0.6332, no_result: 0.1087, loss: 1.1180 ||:  96%|#########5| 23/24 [00:08<00:00,  2.74it/s]
+BLEU: 0.6930, acc: 0.6345, no_result: 0.1042, loss: 1.1338 ||: 100%|##########| 24/24 [00:08<00:00,  3.40it/s]
+BLEU: 0.6930, acc: 0.6345, no_result: 0.1042, loss: 1.1338 ||: 100%|##########| 24/24 [00:08<00:00,  2.77it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0656 ||:   1%|          | 1/110 [00:00<01:17,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0767 ||:   2%|1         | 2/110 [00:01<01:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0678 ||:   3%|2         | 3/110 [00:02<01:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0731 ||:   4%|3         | 4/110 [00:02<01:11,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0696 ||:   5%|4         | 5/110 [00:03<01:07,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0669 ||:   5%|5         | 6/110 [00:04<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0727 ||:   6%|6         | 7/110 [00:04<01:10,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||:   7%|7         | 8/110 [00:05<01:06,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0753 ||:   8%|8         | 9/110 [00:06<01:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0743 ||:   9%|9         | 10/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0766 ||:  10%|#         | 11/110 [00:07<01:07,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0743 ||:  11%|#         | 12/110 [00:08<01:04,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0781 ||:  12%|#1        | 13/110 [00:08<01:01,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0758 ||:  13%|#2        | 14/110 [00:09<01:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0744 ||:  14%|#3        | 15/110 [00:10<01:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||:  15%|#4        | 16/110 [00:10<01:01,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0735 ||:  15%|#5        | 17/110 [00:11<00:59,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0733 ||:  16%|#6        | 18/110 [00:11<00:58,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0722 ||:  17%|#7        | 19/110 [00:12<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0709 ||:  18%|#8        | 20/110 [00:13<01:18,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0712 ||:  19%|#9        | 21/110 [00:14<01:13,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0719 ||:  20%|##        | 22/110 [00:15<01:07,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0722 ||:  21%|##        | 23/110 [00:15<01:03,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0716 ||:  22%|##1       | 24/110 [00:16<01:01,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0711 ||:  23%|##2       | 25/110 [00:17<00:58,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0703 ||:  24%|##3       | 26/110 [00:17<00:57,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0701 ||:  25%|##4       | 27/110 [00:18<00:56,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0700 ||:  25%|##5       | 28/110 [00:19<00:57,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0701 ||:  26%|##6       | 29/110 [00:20<00:57,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0694 ||:  27%|##7       | 30/110 [00:20<00:53,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0698 ||:  28%|##8       | 31/110 [00:21<00:51,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0700 ||:  29%|##9       | 32/110 [00:21<00:49,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0706 ||:  30%|###       | 33/110 [00:22<00:50,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0711 ||:  31%|###       | 34/110 [00:23<00:48,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0719 ||:  32%|###1      | 35/110 [00:23<00:47,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0717 ||:  33%|###2      | 36/110 [00:24<00:47,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0728 ||:  34%|###3      | 37/110 [00:25<00:46,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0745 ||:  35%|###4      | 38/110 [00:25<00:45,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0746 ||:  35%|###5      | 39/110 [00:26<00:46,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0743 ||:  36%|###6      | 40/110 [00:27<00:46,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||:  37%|###7      | 41/110 [00:27<00:45,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0741 ||:  38%|###8      | 42/110 [00:28<00:50,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0743 ||:  39%|###9      | 43/110 [00:29<00:46,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0739 ||:  40%|####      | 44/110 [00:29<00:45,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0744 ||:  41%|####      | 45/110 [00:30<00:43,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0756 ||:  42%|####1     | 46/110 [00:31<00:42,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0754 ||:  43%|####2     | 47/110 [00:31<00:42,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0750 ||:  44%|####3     | 48/110 [00:32<00:45,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0746 ||:  45%|####4     | 49/110 [00:33<00:44,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0745 ||:  45%|####5     | 50/110 [00:34<00:42,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0742 ||:  46%|####6     | 51/110 [00:34<00:42,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0739 ||:  47%|####7     | 52/110 [00:35<00:43,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0749 ||:  48%|####8     | 53/110 [00:36<00:44,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0746 ||:  49%|####9     | 54/110 [00:37<00:42,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0748 ||:  50%|#####     | 55/110 [00:37<00:40,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0747 ||:  51%|#####     | 56/110 [00:38<00:39,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0746 ||:  52%|#####1    | 57/110 [00:39<00:39,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0752 ||:  53%|#####2    | 58/110 [00:40<00:38,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0749 ||:  54%|#####3    | 59/110 [00:40<00:34,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0748 ||:  55%|#####4    | 60/110 [00:41<00:34,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0744 ||:  55%|#####5    | 61/110 [00:42<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0753 ||:  56%|#####6    | 62/110 [00:42<00:33,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0752 ||:  57%|#####7    | 63/110 [00:43<00:31,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0760 ||:  58%|#####8    | 64/110 [00:44<00:32,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0763 ||:  59%|#####9    | 65/110 [00:44<00:31,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0764 ||:  60%|######    | 66/110 [00:45<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0759 ||:  61%|######    | 67/110 [00:46<00:28,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0758 ||:  62%|######1   | 68/110 [00:46<00:29,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0758 ||:  63%|######2   | 69/110 [00:47<00:27,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0756 ||:  64%|######3   | 70/110 [00:48<00:26,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0751 ||:  65%|######4   | 71/110 [00:49<00:27,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0750 ||:  65%|######5   | 72/110 [00:49<00:26,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0749 ||:  66%|######6   | 73/110 [00:50<00:26,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0750 ||:  67%|######7   | 74/110 [00:51<00:24,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0749 ||:  68%|######8   | 75/110 [00:51<00:24,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0748 ||:  69%|######9   | 76/110 [00:52<00:22,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0752 ||:  70%|#######   | 77/110 [00:53<00:22,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0752 ||:  71%|#######   | 78/110 [00:53<00:21,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0754 ||:  72%|#######1  | 79/110 [00:54<00:20,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0759 ||:  73%|#######2  | 80/110 [00:55<00:19,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0764 ||:  74%|#######3  | 81/110 [00:55<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0762 ||:  75%|#######4  | 82/110 [00:56<00:18,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0760 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0761 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0760 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0756 ||:  78%|#######8  | 86/110 [00:59<00:16,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0758 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0756 ||:  80%|########  | 88/110 [01:00<00:14,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0753 ||:  81%|########  | 89/110 [01:01<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0755 ||:  82%|########1 | 90/110 [01:01<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0759 ||:  83%|########2 | 91/110 [01:02<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0758 ||:  84%|########3 | 92/110 [01:03<00:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0761 ||:  85%|########4 | 93/110 [01:03<00:11,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0762 ||:  85%|########5 | 94/110 [01:04<00:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0762 ||:  86%|########6 | 95/110 [01:05<00:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0766 ||:  87%|########7 | 96/110 [01:05<00:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0763 ||:  88%|########8 | 97/110 [01:06<00:08,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0766 ||:  89%|########9 | 98/110 [01:06<00:07,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0764 ||:  90%|######### | 99/110 [01:07<00:07,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0764 ||:  91%|######### | 100/110 [01:08<00:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0765 ||:  92%|#########1| 101/110 [01:08<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0763 ||:  93%|#########2| 102/110 [01:09<00:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0764 ||:  94%|#########3| 103/110 [01:10<00:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0764 ||:  95%|#########4| 104/110 [01:10<00:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0764 ||:  95%|#########5| 105/110 [01:11<00:03,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0765 ||:  96%|#########6| 106/110 [01:12<00:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0767 ||:  97%|#########7| 107/110 [01:12<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0773 ||:  98%|#########8| 108/110 [01:13<00:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0777 ||:  99%|#########9| 109/110 [01:14<00:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0778 ||: 100%|##########| 110/110 [01:14<00:00,  1.76it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0778 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7215, acc: 0.6562, no_result: 0.0938, loss: 1.0040 ||:   4%|4         | 1/24 [00:00<00:08,  2.78it/s]
+BLEU: 0.6833, acc: 0.6875, no_result: 0.1094, loss: 0.9450 ||:   8%|8         | 2/24 [00:00<00:07,  2.81it/s]
+BLEU: 0.6673, acc: 0.6458, no_result: 0.1354, loss: 1.0634 ||:  12%|#2        | 3/24 [00:01<00:07,  2.83it/s]
+BLEU: 0.7072, acc: 0.6641, no_result: 0.1328, loss: 0.9999 ||:  17%|#6        | 4/24 [00:01<00:07,  2.75it/s]
+BLEU: 0.7226, acc: 0.6875, no_result: 0.1250, loss: 0.9742 ||:  21%|##        | 5/24 [00:01<00:07,  2.71it/s]
+BLEU: 0.7163, acc: 0.6719, no_result: 0.1146, loss: 0.9713 ||:  25%|##5       | 6/24 [00:02<00:06,  2.82it/s]
+BLEU: 0.7232, acc: 0.6830, no_result: 0.1116, loss: 0.9488 ||:  29%|##9       | 7/24 [00:02<00:05,  2.92it/s]
+BLEU: 0.7198, acc: 0.6953, no_result: 0.1094, loss: 0.9647 ||:  33%|###3      | 8/24 [00:02<00:05,  2.96it/s]
+BLEU: 0.7085, acc: 0.7049, no_result: 0.1042, loss: 0.9540 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7163, acc: 0.6969, no_result: 0.1062, loss: 0.9710 ||:  42%|####1     | 10/24 [00:03<00:04,  2.82it/s]
+BLEU: 0.7166, acc: 0.6761, no_result: 0.1108, loss: 1.0220 ||:  46%|####5     | 11/24 [00:03<00:04,  2.70it/s]
+BLEU: 0.7111, acc: 0.6771, no_result: 0.1068, loss: 1.0330 ||:  50%|#####     | 12/24 [00:04<00:04,  2.82it/s]
+BLEU: 0.7115, acc: 0.6755, no_result: 0.1106, loss: 1.0557 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.73it/s]
+BLEU: 0.7116, acc: 0.6585, no_result: 0.1116, loss: 1.1036 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.62it/s]
+BLEU: 0.7023, acc: 0.6417, no_result: 0.1292, loss: 1.1576 ||:  62%|######2   | 15/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.7030, acc: 0.6328, no_result: 0.1309, loss: 1.1847 ||:  67%|######6   | 16/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.7017, acc: 0.6287, no_result: 0.1379, loss: 1.1781 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.7002, acc: 0.6215, no_result: 0.1424, loss: 1.1925 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7036, acc: 0.6316, no_result: 0.1382, loss: 1.1728 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.7066, acc: 0.6375, no_result: 0.1344, loss: 1.1608 ||:  83%|########3 | 20/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.7017, acc: 0.6354, no_result: 0.1295, loss: 1.1617 ||:  88%|########7 | 21/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7053, acc: 0.6278, no_result: 0.1278, loss: 1.1603 ||:  92%|#########1| 22/24 [00:08<00:00,  2.65it/s]
+BLEU: 0.7086, acc: 0.6345, no_result: 0.1236, loss: 1.1400 ||:  96%|#########5| 23/24 [00:08<00:00,  2.71it/s]
+BLEU: 0.7073, acc: 0.6312, no_result: 0.1231, loss: 1.1586 ||: 100%|##########| 24/24 [00:08<00:00,  3.36it/s]
+BLEU: 0.7073, acc: 0.6312, no_result: 0.1231, loss: 1.1586 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0792 ||:   1%|          | 1/110 [00:00<01:15,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0800 ||:   2%|1         | 2/110 [00:01<01:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0676 ||:   3%|2         | 3/110 [00:02<01:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0726 ||:   4%|3         | 4/110 [00:02<01:15,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0749 ||:   5%|4         | 5/110 [00:03<01:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0756 ||:   5%|5         | 6/110 [00:04<01:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0728 ||:   6%|6         | 7/110 [00:04<01:08,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0716 ||:   7%|7         | 8/110 [00:05<01:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0719 ||:   8%|8         | 9/110 [00:06<01:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0733 ||:   9%|9         | 10/110 [00:07<01:35,  1.04it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0742 ||:  10%|#         | 11/110 [00:08<01:29,  1.10it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0737 ||:  11%|#         | 12/110 [00:09<01:24,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0757 ||:  12%|#1        | 13/110 [00:09<01:17,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0748 ||:  13%|#2        | 14/110 [00:10<01:18,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0768 ||:  14%|#3        | 15/110 [00:11<01:16,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0756 ||:  15%|#4        | 16/110 [00:12<01:12,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0747 ||:  15%|#5        | 17/110 [00:12<01:08,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0773 ||:  16%|#6        | 18/110 [00:13<01:07,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0755 ||:  17%|#7        | 19/110 [00:14<01:06,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0755 ||:  18%|#8        | 20/110 [00:15<01:06,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0750 ||:  19%|#9        | 21/110 [00:15<01:07,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0752 ||:  20%|##        | 22/110 [00:16<01:06,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0744 ||:  21%|##        | 23/110 [00:17<01:03,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0761 ||:  22%|##1       | 24/110 [00:18<01:02,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0757 ||:  23%|##2       | 25/110 [00:18<01:00,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0762 ||:  24%|##3       | 26/110 [00:19<01:06,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0769 ||:  25%|##4       | 27/110 [00:20<01:01,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0769 ||:  25%|##5       | 28/110 [00:21<00:58,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0770 ||:  26%|##6       | 29/110 [00:21<01:02,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0773 ||:  27%|##7       | 30/110 [00:22<01:00,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0769 ||:  28%|##8       | 31/110 [00:23<00:59,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0771 ||:  29%|##9       | 32/110 [00:24<00:57,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0769 ||:  30%|###       | 33/110 [00:24<01:00,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0760 ||:  31%|###       | 34/110 [00:25<00:57,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0759 ||:  32%|###1      | 35/110 [00:26<00:54,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0775 ||:  33%|###2      | 36/110 [00:27<00:53,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0773 ||:  34%|###3      | 37/110 [00:27<00:53,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0767 ||:  35%|###4      | 38/110 [00:28<00:51,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0762 ||:  35%|###5      | 39/110 [00:29<00:50,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0767 ||:  36%|###6      | 40/110 [00:29<00:50,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0766 ||:  37%|###7      | 41/110 [00:30<00:49,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0764 ||:  38%|###8      | 42/110 [00:31<00:47,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0761 ||:  39%|###9      | 43/110 [00:32<00:49,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0760 ||:  40%|####      | 44/110 [00:32<00:48,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0755 ||:  41%|####      | 45/110 [00:33<00:45,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0752 ||:  42%|####1     | 46/110 [00:34<00:46,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0755 ||:  43%|####2     | 47/110 [00:35<00:47,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0756 ||:  44%|####3     | 48/110 [00:35<00:47,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0753 ||:  45%|####4     | 49/110 [00:36<00:46,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0753 ||:  45%|####5     | 50/110 [00:37<00:42,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0755 ||:  46%|####6     | 51/110 [00:37<00:41,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0758 ||:  47%|####7     | 52/110 [00:38<00:43,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0757 ||:  48%|####8     | 53/110 [00:39<00:41,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0758 ||:  49%|####9     | 54/110 [00:40<00:41,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0773 ||:  50%|#####     | 55/110 [00:40<00:39,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0778 ||:  51%|#####     | 56/110 [00:41<00:38,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0783 ||:  52%|#####1    | 57/110 [00:42<00:36,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0787 ||:  53%|#####2    | 58/110 [00:42<00:35,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0783 ||:  54%|#####3    | 59/110 [00:43<00:35,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0784 ||:  55%|#####4    | 60/110 [00:44<00:34,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0786 ||:  55%|#####5    | 61/110 [00:44<00:33,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0790 ||:  56%|#####6    | 62/110 [00:45<00:34,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0793 ||:  57%|#####7    | 63/110 [00:46<00:35,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0792 ||:  58%|#####8    | 64/110 [00:47<00:35,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0796 ||:  59%|#####9    | 65/110 [00:48<00:33,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0796 ||:  60%|######    | 66/110 [00:48<00:31,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0801 ||:  61%|######    | 67/110 [00:49<00:31,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0799 ||:  62%|######1   | 68/110 [00:50<00:29,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0797 ||:  63%|######2   | 69/110 [00:50<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0802 ||:  64%|######3   | 70/110 [00:51<00:30,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0805 ||:  65%|######4   | 71/110 [00:52<00:29,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0803 ||:  65%|######5   | 72/110 [00:53<00:28,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0808 ||:  66%|######6   | 73/110 [00:54<00:28,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0802 ||:  67%|######7   | 74/110 [00:54<00:26,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0806 ||:  68%|######8   | 75/110 [00:55<00:25,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0805 ||:  69%|######9   | 76/110 [00:56<00:24,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0801 ||:  70%|#######   | 77/110 [00:56<00:24,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0799 ||:  71%|#######   | 78/110 [00:57<00:23,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0800 ||:  72%|#######1  | 79/110 [00:58<00:23,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0798 ||:  73%|#######2  | 80/110 [00:59<00:22,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0797 ||:  74%|#######3  | 81/110 [00:59<00:21,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0794 ||:  75%|#######4  | 82/110 [01:00<00:19,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0795 ||:  75%|#######5  | 83/110 [01:01<00:19,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0793 ||:  76%|#######6  | 84/110 [01:01<00:18,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0794 ||:  77%|#######7  | 85/110 [01:02<00:17,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0802 ||:  78%|#######8  | 86/110 [01:03<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0802 ||:  79%|#######9  | 87/110 [01:04<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0806 ||:  80%|########  | 88/110 [01:04<00:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0805 ||:  81%|########  | 89/110 [01:05<00:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0803 ||:  82%|########1 | 90/110 [01:06<00:14,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0804 ||:  83%|########2 | 91/110 [01:06<00:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0803 ||:  84%|########3 | 92/110 [01:07<00:13,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0805 ||:  85%|########4 | 93/110 [01:08<00:12,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0806 ||:  85%|########5 | 94/110 [01:09<00:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0808 ||:  86%|########6 | 95/110 [01:09<00:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0809 ||:  87%|########7 | 96/110 [01:10<00:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0808 ||:  88%|########8 | 97/110 [01:11<00:09,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0808 ||:  89%|########9 | 98/110 [01:11<00:08,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0808 ||:  90%|######### | 99/110 [01:12<00:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0808 ||:  91%|######### | 100/110 [01:13<00:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0809 ||:  92%|#########1| 101/110 [01:14<00:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0809 ||:  93%|#########2| 102/110 [01:15<00:06,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0807 ||:  94%|#########3| 103/110 [01:15<00:05,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0807 ||:  95%|#########4| 104/110 [01:16<00:04,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0808 ||:  95%|#########5| 105/110 [01:17<00:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0809 ||:  96%|#########6| 106/110 [01:17<00:02,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0807 ||:  97%|#########7| 107/110 [01:18<00:02,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0811 ||:  98%|#########8| 108/110 [01:19<00:01,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0811 ||:  99%|#########9| 109/110 [01:20<00:00,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0811 ||: 100%|##########| 110/110 [01:21<00:00,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0811 ||: 100%|##########| 110/110 [01:21<00:00,  1.35it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6885, acc: 0.6562, no_result: 0.0938, loss: 1.0289 ||:   4%|4         | 1/24 [00:00<00:08,  2.61it/s]
+BLEU: 0.6342, acc: 0.6875, no_result: 0.0938, loss: 0.9258 ||:   8%|8         | 2/24 [00:00<00:08,  2.68it/s]
+BLEU: 0.6235, acc: 0.6771, no_result: 0.1146, loss: 1.1001 ||:  12%|#2        | 3/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.6593, acc: 0.6719, no_result: 0.1172, loss: 1.0347 ||:  17%|#6        | 4/24 [00:01<00:07,  2.64it/s]
+BLEU: 0.6906, acc: 0.6813, no_result: 0.1187, loss: 0.9900 ||:  21%|##        | 5/24 [00:01<00:07,  2.61it/s]
+BLEU: 0.6899, acc: 0.6667, no_result: 0.1146, loss: 0.9954 ||:  25%|##5       | 6/24 [00:02<00:06,  2.72it/s]
+BLEU: 0.7003, acc: 0.6741, no_result: 0.1071, loss: 0.9831 ||:  29%|##9       | 7/24 [00:02<00:06,  2.83it/s]
+BLEU: 0.6964, acc: 0.6875, no_result: 0.1016, loss: 0.9906 ||:  33%|###3      | 8/24 [00:02<00:05,  2.87it/s]
+BLEU: 0.6935, acc: 0.6944, no_result: 0.0972, loss: 0.9799 ||:  38%|###7      | 9/24 [00:03<00:05,  2.83it/s]
+BLEU: 0.6987, acc: 0.6844, no_result: 0.0938, loss: 0.9944 ||:  42%|####1     | 10/24 [00:03<00:05,  2.80it/s]
+BLEU: 0.7001, acc: 0.6705, no_result: 0.0966, loss: 1.0387 ||:  46%|####5     | 11/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.6976, acc: 0.6745, no_result: 0.0885, loss: 1.0451 ||:  50%|#####     | 12/24 [00:04<00:04,  2.80it/s]
+BLEU: 0.6965, acc: 0.6755, no_result: 0.0913, loss: 1.0687 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.6984, acc: 0.6585, no_result: 0.0982, loss: 1.1175 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.59it/s]
+BLEU: 0.6862, acc: 0.6396, no_result: 0.1187, loss: 1.1764 ||:  62%|######2   | 15/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.6847, acc: 0.6270, no_result: 0.1211, loss: 1.1989 ||:  67%|######6   | 16/24 [00:06<00:03,  2.39it/s]
+BLEU: 0.6845, acc: 0.6195, no_result: 0.1268, loss: 1.1931 ||:  71%|#######   | 17/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.6825, acc: 0.6163, no_result: 0.1302, loss: 1.2093 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.6859, acc: 0.6266, no_result: 0.1250, loss: 1.1848 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.6896, acc: 0.6344, no_result: 0.1219, loss: 1.1728 ||:  83%|########3 | 20/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.6863, acc: 0.6324, no_result: 0.1176, loss: 1.1747 ||:  88%|########7 | 21/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.6910, acc: 0.6278, no_result: 0.1151, loss: 1.1742 ||:  92%|#########1| 22/24 [00:08<00:00,  2.65it/s]
+BLEU: 0.6965, acc: 0.6359, no_result: 0.1128, loss: 1.1529 ||:  96%|#########5| 23/24 [00:08<00:00,  2.74it/s]
+BLEU: 0.6965, acc: 0.6325, no_result: 0.1127, loss: 1.1652 ||: 100%|##########| 24/24 [00:08<00:00,  3.40it/s]
+BLEU: 0.6965, acc: 0.6325, no_result: 0.1127, loss: 1.1652 ||: 100%|##########| 24/24 [00:08<00:00,  2.73it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0762 ||:   1%|          | 1/110 [00:00<01:29,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0778 ||:   2%|1         | 2/110 [00:01<01:21,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0777 ||:   3%|2         | 3/110 [00:02<01:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0752 ||:   4%|3         | 4/110 [00:02<01:13,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0810 ||:   5%|4         | 5/110 [00:03<01:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0867 ||:   5%|5         | 6/110 [00:04<01:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0858 ||:   6%|6         | 7/110 [00:05<01:15,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0861 ||:   7%|7         | 8/110 [00:05<01:17,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0889 ||:   8%|8         | 9/110 [00:06<01:15,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0895 ||:   9%|9         | 10/110 [00:07<01:13,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0873 ||:  10%|#         | 11/110 [00:07<01:11,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0848 ||:  11%|#         | 12/110 [00:08<01:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0869 ||:  12%|#1        | 13/110 [00:09<01:04,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0852 ||:  13%|#2        | 14/110 [00:10<01:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0829 ||:  14%|#3        | 15/110 [00:10<01:07,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0813 ||:  15%|#4        | 16/110 [00:11<01:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0810 ||:  15%|#5        | 17/110 [00:12<01:03,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0811 ||:  16%|#6        | 18/110 [00:12<01:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0797 ||:  17%|#7        | 19/110 [00:13<01:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0805 ||:  18%|#8        | 20/110 [00:14<01:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0808 ||:  19%|#9        | 21/110 [00:15<01:07,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0806 ||:  20%|##        | 22/110 [00:15<01:04,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0799 ||:  21%|##        | 23/110 [00:16<01:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0791 ||:  22%|##1       | 24/110 [00:17<00:59,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0784 ||:  23%|##2       | 25/110 [00:17<00:56,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0775 ||:  24%|##3       | 26/110 [00:18<00:53,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0780 ||:  25%|##4       | 27/110 [00:18<00:55,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0784 ||:  25%|##5       | 28/110 [00:19<00:55,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0786 ||:  26%|##6       | 29/110 [00:20<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0786 ||:  27%|##7       | 30/110 [00:20<00:51,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0780 ||:  28%|##8       | 31/110 [00:21<00:51,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0780 ||:  29%|##9       | 32/110 [00:22<00:52,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0774 ||:  30%|###       | 33/110 [00:22<00:51,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0763 ||:  31%|###       | 34/110 [00:23<00:50,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0765 ||:  32%|###1      | 35/110 [00:24<00:49,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0755 ||:  33%|###2      | 36/110 [00:24<00:48,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0750 ||:  34%|###3      | 37/110 [00:25<00:49,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0750 ||:  35%|###4      | 38/110 [00:26<00:50,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0749 ||:  35%|###5      | 39/110 [00:27<00:50,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0743 ||:  36%|###6      | 40/110 [00:27<00:47,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0747 ||:  37%|###7      | 41/110 [00:28<00:48,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0748 ||:  38%|###8      | 42/110 [00:29<00:46,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0743 ||:  39%|###9      | 43/110 [00:29<00:45,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0735 ||:  40%|####      | 44/110 [00:30<00:44,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0736 ||:  41%|####      | 45/110 [00:31<00:43,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0739 ||:  42%|####1     | 46/110 [00:31<00:42,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0735 ||:  43%|####2     | 47/110 [00:32<00:45,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0736 ||:  44%|####3     | 48/110 [00:33<00:44,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0737 ||:  45%|####4     | 49/110 [00:34<00:44,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0737 ||:  45%|####5     | 50/110 [00:34<00:41,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||:  46%|####6     | 51/110 [00:35<00:42,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0737 ||:  47%|####7     | 52/110 [00:36<00:41,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0732 ||:  48%|####8     | 53/110 [00:36<00:39,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0732 ||:  49%|####9     | 54/110 [00:37<00:37,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0729 ||:  50%|#####     | 55/110 [00:38<00:35,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0731 ||:  51%|#####     | 56/110 [00:38<00:36,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0732 ||:  52%|#####1    | 57/110 [00:39<00:35,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0740 ||:  53%|#####2    | 58/110 [00:40<00:33,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0736 ||:  55%|#####4    | 60/110 [00:41<00:35,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||:  55%|#####5    | 61/110 [00:42<00:35,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0735 ||:  56%|#####6    | 62/110 [00:42<00:33,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0732 ||:  57%|#####7    | 63/110 [00:43<00:33,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0728 ||:  58%|#####8    | 64/110 [00:44<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0726 ||:  59%|#####9    | 65/110 [00:45<00:33,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0725 ||:  60%|######    | 66/110 [00:45<00:31,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0729 ||:  61%|######    | 67/110 [00:46<00:31,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0726 ||:  62%|######1   | 68/110 [00:47<00:30,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0728 ||:  63%|######2   | 69/110 [00:48<00:29,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0732 ||:  64%|######3   | 70/110 [00:48<00:28,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0731 ||:  65%|######4   | 71/110 [00:49<00:27,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0730 ||:  65%|######5   | 72/110 [00:50<00:27,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0730 ||:  66%|######6   | 73/110 [00:50<00:26,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0731 ||:  67%|######7   | 74/110 [00:51<00:26,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0733 ||:  68%|######8   | 75/110 [00:52<00:25,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0730 ||:  69%|######9   | 76/110 [00:53<00:24,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0728 ||:  70%|#######   | 77/110 [00:53<00:23,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0728 ||:  71%|#######   | 78/110 [00:54<00:23,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0724 ||:  72%|#######1  | 79/110 [00:55<00:22,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0723 ||:  73%|#######2  | 80/110 [00:55<00:21,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0720 ||:  74%|#######3  | 81/110 [00:56<00:19,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0720 ||:  75%|#######4  | 82/110 [00:57<00:18,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0725 ||:  75%|#######5  | 83/110 [00:57<00:18,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0729 ||:  76%|#######6  | 84/110 [00:58<00:17,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0731 ||:  77%|#######7  | 85/110 [00:59<00:17,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0733 ||:  78%|#######8  | 86/110 [00:59<00:16,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0731 ||:  79%|#######9  | 87/110 [01:00<00:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0734 ||:  80%|########  | 88/110 [01:01<00:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0734 ||:  81%|########  | 89/110 [01:02<00:14,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0734 ||:  82%|########1 | 90/110 [01:02<00:14,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0728 ||:  83%|########2 | 91/110 [01:03<00:13,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0727 ||:  84%|########3 | 92/110 [01:04<00:13,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0731 ||:  85%|########4 | 93/110 [01:05<00:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||:  85%|########5 | 94/110 [01:05<00:11,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0740 ||:  86%|########6 | 95/110 [01:06<00:10,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||:  87%|########7 | 96/110 [01:07<00:10,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0737 ||:  88%|########8 | 97/110 [01:08<00:09,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0735 ||:  89%|########9 | 98/110 [01:08<00:09,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||:  90%|######### | 99/110 [01:09<00:08,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0743 ||:  91%|######### | 100/110 [01:11<00:10,  1.00s/it]
+acc: 0.0000, no_result: 0.0000, loss: 0.0745 ||:  92%|#########1| 101/110 [01:11<00:08,  1.11it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0742 ||:  93%|#########2| 102/110 [01:12<00:06,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0742 ||:  94%|#########3| 103/110 [01:13<00:05,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0740 ||:  95%|#########4| 104/110 [01:14<00:04,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0739 ||:  95%|#########5| 105/110 [01:14<00:03,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0741 ||:  96%|#########6| 106/110 [01:15<00:03,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0741 ||:  97%|#########7| 107/110 [01:16<00:02,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0742 ||:  98%|#########8| 108/110 [01:16<00:01,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0742 ||:  99%|#########9| 109/110 [01:17<00:00,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||: 100%|##########| 110/110 [01:18<00:00,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0738 ||: 100%|##########| 110/110 [01:18<00:00,  1.41it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6941, acc: 0.6562, no_result: 0.0625, loss: 1.0573 ||:   4%|4         | 1/24 [00:00<00:09,  2.38it/s]
+BLEU: 0.6468, acc: 0.6562, no_result: 0.0781, loss: 0.9898 ||:   8%|8         | 2/24 [00:00<00:09,  2.42it/s]
+BLEU: 0.6226, acc: 0.6354, no_result: 0.1146, loss: 1.1246 ||:  12%|#2        | 3/24 [00:01<00:08,  2.43it/s]
+BLEU: 0.6646, acc: 0.6250, no_result: 0.1094, loss: 1.0649 ||:  17%|#6        | 4/24 [00:01<00:08,  2.38it/s]
+BLEU: 0.7007, acc: 0.6625, no_result: 0.1000, loss: 1.0075 ||:  21%|##        | 5/24 [00:02<00:08,  2.37it/s]
+BLEU: 0.7043, acc: 0.6406, no_result: 0.0938, loss: 1.0003 ||:  25%|##5       | 6/24 [00:02<00:07,  2.45it/s]
+BLEU: 0.7149, acc: 0.6518, no_result: 0.0848, loss: 0.9865 ||:  29%|##9       | 7/24 [00:02<00:06,  2.55it/s]
+BLEU: 0.7116, acc: 0.6641, no_result: 0.0820, loss: 0.9994 ||:  33%|###3      | 8/24 [00:03<00:06,  2.57it/s]
+BLEU: 0.6999, acc: 0.6632, no_result: 0.0938, loss: 0.9904 ||:  38%|###7      | 9/24 [00:03<00:06,  2.49it/s]
+BLEU: 0.7087, acc: 0.6531, no_result: 0.0969, loss: 1.0065 ||:  42%|####1     | 10/24 [00:04<00:05,  2.43it/s]
+BLEU: 0.7077, acc: 0.6449, no_result: 0.0994, loss: 1.0512 ||:  46%|####5     | 11/24 [00:04<00:05,  2.32it/s]
+BLEU: 0.7038, acc: 0.6484, no_result: 0.0938, loss: 1.0681 ||:  50%|#####     | 12/24 [00:04<00:05,  2.39it/s]
+BLEU: 0.7030, acc: 0.6490, no_result: 0.0986, loss: 1.0872 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.32it/s]
+BLEU: 0.7051, acc: 0.6384, no_result: 0.0982, loss: 1.1330 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.22it/s]
+BLEU: 0.6928, acc: 0.6208, no_result: 0.1167, loss: 1.1884 ||:  62%|######2   | 15/24 [00:06<00:04,  2.08it/s]
+BLEU: 0.6923, acc: 0.6172, no_result: 0.1191, loss: 1.2134 ||:  67%|######6   | 16/24 [00:06<00:03,  2.05it/s]
+BLEU: 0.6924, acc: 0.6140, no_result: 0.1232, loss: 1.2085 ||:  71%|#######   | 17/24 [00:07<00:03,  2.11it/s]
+BLEU: 0.6911, acc: 0.6128, no_result: 0.1267, loss: 1.2276 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.09it/s]
+BLEU: 0.6941, acc: 0.6217, no_result: 0.1234, loss: 1.1998 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.18it/s]
+BLEU: 0.6962, acc: 0.6266, no_result: 0.1203, loss: 1.1873 ||:  83%|########3 | 20/24 [00:08<00:01,  2.31it/s]
+BLEU: 0.6954, acc: 0.6205, no_result: 0.1190, loss: 1.1883 ||:  88%|########7 | 21/24 [00:09<00:01,  2.29it/s]
+BLEU: 0.7001, acc: 0.6193, no_result: 0.1151, loss: 1.1857 ||:  92%|#########1| 22/24 [00:09<00:00,  2.26it/s]
+BLEU: 0.7041, acc: 0.6264, no_result: 0.1114, loss: 1.1643 ||:  96%|#########5| 23/24 [00:09<00:00,  2.34it/s]
+BLEU: 0.7039, acc: 0.6234, no_result: 0.1114, loss: 1.1805 ||: 100%|##########| 24/24 [00:10<00:00,  2.87it/s]
+BLEU: 0.7039, acc: 0.6234, no_result: 0.1114, loss: 1.1805 ||: 100%|##########| 24/24 [00:10<00:00,  2.37it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0531 ||:   1%|          | 1/110 [00:00<01:33,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0502 ||:   2%|1         | 2/110 [00:01<01:22,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0595 ||:   3%|2         | 3/110 [00:02<01:22,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0542 ||:   4%|3         | 4/110 [00:03<01:30,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0536 ||:   5%|4         | 5/110 [00:04<01:24,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0519 ||:   5%|5         | 6/110 [00:04<01:19,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0523 ||:   6%|6         | 7/110 [00:05<01:16,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0551 ||:   7%|7         | 8/110 [00:06<01:14,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0577 ||:   8%|8         | 9/110 [00:06<01:13,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0572 ||:   9%|9         | 10/110 [00:07<01:12,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0583 ||:  10%|#         | 11/110 [00:08<01:10,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0574 ||:  11%|#         | 12/110 [00:08<01:10,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0600 ||:  12%|#1        | 13/110 [00:09<01:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0599 ||:  13%|#2        | 14/110 [00:10<01:08,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0594 ||:  14%|#3        | 15/110 [00:11<01:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0587 ||:  15%|#4        | 16/110 [00:11<01:08,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0599 ||:  15%|#5        | 17/110 [00:12<01:10,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0611 ||:  16%|#6        | 18/110 [00:13<01:09,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0612 ||:  17%|#7        | 19/110 [00:14<01:06,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0621 ||:  18%|#8        | 20/110 [00:14<01:04,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0619 ||:  19%|#9        | 21/110 [00:15<01:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0630 ||:  20%|##        | 22/110 [00:16<01:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0623 ||:  21%|##        | 23/110 [00:16<01:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0619 ||:  22%|##1       | 24/110 [00:17<00:59,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0617 ||:  23%|##2       | 25/110 [00:18<00:58,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0616 ||:  24%|##3       | 26/110 [00:19<01:01,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0609 ||:  25%|##4       | 27/110 [00:19<00:59,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0604 ||:  25%|##5       | 28/110 [00:20<01:00,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0609 ||:  26%|##6       | 29/110 [00:21<00:59,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0615 ||:  27%|##7       | 30/110 [00:21<00:59,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0618 ||:  28%|##8       | 31/110 [00:22<01:01,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0617 ||:  29%|##9       | 32/110 [00:23<00:59,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0618 ||:  30%|###       | 33/110 [00:24<00:57,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0637 ||:  31%|###       | 34/110 [00:24<00:55,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0632 ||:  32%|###1      | 35/110 [00:25<00:52,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0629 ||:  33%|###2      | 36/110 [00:26<00:52,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0632 ||:  34%|###3      | 37/110 [00:27<00:51,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0626 ||:  35%|###4      | 38/110 [00:27<00:53,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0625 ||:  35%|###5      | 39/110 [00:28<00:51,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0626 ||:  36%|###6      | 40/110 [00:29<00:50,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0629 ||:  37%|###7      | 41/110 [00:29<00:48,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0638 ||:  38%|###8      | 42/110 [00:30<00:47,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0634 ||:  39%|###9      | 43/110 [00:31<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0642 ||:  40%|####      | 44/110 [00:31<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0652 ||:  41%|####      | 45/110 [00:32<00:44,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0647 ||:  42%|####1     | 46/110 [00:33<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0653 ||:  43%|####2     | 47/110 [00:33<00:42,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0661 ||:  44%|####3     | 48/110 [00:34<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0657 ||:  45%|####4     | 49/110 [00:35<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0658 ||:  45%|####5     | 50/110 [00:35<00:40,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0660 ||:  46%|####6     | 51/110 [00:36<00:39,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0659 ||:  47%|####7     | 52/110 [00:37<00:40,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0666 ||:  48%|####8     | 53/110 [00:38<00:40,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0665 ||:  49%|####9     | 54/110 [00:38<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0668 ||:  50%|#####     | 55/110 [00:39<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0671 ||:  51%|#####     | 56/110 [00:40<00:39,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0670 ||:  52%|#####1    | 57/110 [00:41<00:38,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0671 ||:  53%|#####2    | 58/110 [00:41<00:38,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0670 ||:  54%|#####3    | 59/110 [00:42<00:38,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0676 ||:  55%|#####4    | 60/110 [00:43<00:38,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0673 ||:  55%|#####5    | 61/110 [00:44<00:39,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0670 ||:  56%|#####6    | 62/110 [00:44<00:36,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0669 ||:  57%|#####7    | 63/110 [00:45<00:34,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0668 ||:  58%|#####8    | 64/110 [00:46<00:34,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0666 ||:  59%|#####9    | 65/110 [00:47<00:32,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0662 ||:  60%|######    | 66/110 [00:47<00:30,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0658 ||:  61%|######    | 67/110 [00:48<00:30,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0656 ||:  62%|######1   | 68/110 [00:49<00:30,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0658 ||:  63%|######2   | 69/110 [00:49<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0660 ||:  64%|######3   | 70/110 [00:50<00:27,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0668 ||:  65%|######4   | 71/110 [00:51<00:28,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0667 ||:  65%|######5   | 72/110 [00:51<00:27,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0669 ||:  66%|######6   | 73/110 [00:52<00:26,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0666 ||:  67%|######7   | 74/110 [00:53<00:28,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0669 ||:  68%|######8   | 75/110 [00:54<00:26,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0671 ||:  69%|######9   | 76/110 [00:55<00:25,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0674 ||:  70%|#######   | 77/110 [00:55<00:24,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0675 ||:  71%|#######   | 78/110 [00:56<00:24,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0674 ||:  72%|#######1  | 79/110 [00:57<00:23,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0675 ||:  73%|#######2  | 80/110 [00:58<00:22,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0677 ||:  74%|#######3  | 81/110 [00:58<00:22,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0678 ||:  75%|#######4  | 82/110 [00:59<00:20,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0679 ||:  75%|#######5  | 83/110 [01:00<00:19,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0679 ||:  76%|#######6  | 84/110 [01:00<00:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0680 ||:  77%|#######7  | 85/110 [01:01<00:17,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0678 ||:  78%|#######8  | 86/110 [01:02<00:17,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0680 ||:  79%|#######9  | 87/110 [01:03<00:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0682 ||:  80%|########  | 88/110 [01:03<00:15,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0680 ||:  81%|########  | 89/110 [01:04<00:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0682 ||:  82%|########1 | 90/110 [01:06<00:20,  1.01s/it]
+acc: 0.0000, no_result: 0.0000, loss: 0.0687 ||:  83%|########2 | 91/110 [01:06<00:17,  1.10it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0688 ||:  84%|########3 | 92/110 [01:07<00:15,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0686 ||:  85%|########4 | 93/110 [01:08<00:13,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0684 ||:  85%|########5 | 94/110 [01:08<00:12,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0686 ||:  86%|########6 | 95/110 [01:09<00:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0684 ||:  87%|########7 | 96/110 [01:10<00:10,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0688 ||:  88%|########8 | 97/110 [01:11<00:09,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0686 ||:  89%|########9 | 98/110 [01:11<00:09,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0686 ||:  90%|######### | 99/110 [01:12<00:08,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0686 ||:  91%|######### | 100/110 [01:13<00:07,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0685 ||:  92%|#########1| 101/110 [01:13<00:06,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0683 ||:  93%|#########2| 102/110 [01:14<00:05,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0683 ||:  94%|#########3| 103/110 [01:15<00:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0681 ||:  95%|#########4| 104/110 [01:16<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0682 ||:  95%|#########5| 105/110 [01:16<00:03,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0684 ||:  96%|#########6| 106/110 [01:17<00:02,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0683 ||:  97%|#########7| 107/110 [01:18<00:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0683 ||:  98%|#########8| 108/110 [01:18<00:01,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0681 ||:  99%|#########9| 109/110 [01:19<00:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0679 ||: 100%|##########| 110/110 [01:19<00:00,  1.79it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0679 ||: 100%|##########| 110/110 [01:19<00:00,  1.38it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6941, acc: 0.6875, no_result: 0.0625, loss: 1.0852 ||:   4%|4         | 1/24 [00:00<00:08,  2.76it/s]
+BLEU: 0.6611, acc: 0.6875, no_result: 0.0781, loss: 1.0328 ||:   8%|8         | 2/24 [00:00<00:07,  2.82it/s]
+BLEU: 0.6555, acc: 0.6458, no_result: 0.1146, loss: 1.1261 ||:  12%|#2        | 3/24 [00:01<00:07,  2.86it/s]
+BLEU: 0.6789, acc: 0.6484, no_result: 0.1250, loss: 1.0747 ||:  17%|#6        | 4/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.7121, acc: 0.6687, no_result: 0.1250, loss: 1.0333 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7148, acc: 0.6510, no_result: 0.1198, loss: 1.0181 ||:  25%|##5       | 6/24 [00:02<00:06,  2.81it/s]
+BLEU: 0.7205, acc: 0.6652, no_result: 0.1116, loss: 1.0036 ||:  29%|##9       | 7/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.7116, acc: 0.6797, no_result: 0.1055, loss: 1.0204 ||:  33%|###3      | 8/24 [00:02<00:05,  2.98it/s]
+BLEU: 0.6980, acc: 0.6875, no_result: 0.1042, loss: 1.0078 ||:  38%|###7      | 9/24 [00:03<00:05,  2.92it/s]
+BLEU: 0.7098, acc: 0.6750, no_result: 0.1031, loss: 1.0253 ||:  42%|####1     | 10/24 [00:03<00:04,  2.85it/s]
+BLEU: 0.7119, acc: 0.6648, no_result: 0.1023, loss: 1.0639 ||:  46%|####5     | 11/24 [00:03<00:04,  2.73it/s]
+BLEU: 0.7079, acc: 0.6667, no_result: 0.1016, loss: 1.0803 ||:  50%|#####     | 12/24 [00:04<00:04,  2.82it/s]
+BLEU: 0.7021, acc: 0.6683, no_result: 0.1034, loss: 1.0974 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.75it/s]
+BLEU: 0.7024, acc: 0.6540, no_result: 0.1004, loss: 1.1379 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.65it/s]
+BLEU: 0.6936, acc: 0.6375, no_result: 0.1167, loss: 1.1914 ||:  62%|######2   | 15/24 [00:05<00:03,  2.47it/s]
+BLEU: 0.6945, acc: 0.6367, no_result: 0.1211, loss: 1.2180 ||:  67%|######6   | 16/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.6924, acc: 0.6287, no_result: 0.1268, loss: 1.2104 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.6913, acc: 0.6285, no_result: 0.1285, loss: 1.2264 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.6951, acc: 0.6365, no_result: 0.1250, loss: 1.2021 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.6973, acc: 0.6406, no_result: 0.1234, loss: 1.1929 ||:  83%|########3 | 20/24 [00:07<00:01,  2.64it/s]
+BLEU: 0.6941, acc: 0.6369, no_result: 0.1190, loss: 1.1977 ||:  88%|########7 | 21/24 [00:07<00:01,  2.62it/s]
+BLEU: 0.6987, acc: 0.6307, no_result: 0.1179, loss: 1.1929 ||:  92%|#########1| 22/24 [00:08<00:00,  2.60it/s]
+BLEU: 0.7015, acc: 0.6372, no_result: 0.1155, loss: 1.1711 ||:  96%|#########5| 23/24 [00:08<00:00,  2.69it/s]
+BLEU: 0.7000, acc: 0.6338, no_result: 0.1153, loss: 1.1899 ||: 100%|##########| 24/24 [00:08<00:00,  3.34it/s]
+BLEU: 0.7000, acc: 0.6338, no_result: 0.1153, loss: 1.1899 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0556 ||:   1%|          | 1/110 [00:00<01:14,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0532 ||:   2%|1         | 2/110 [00:01<01:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0527 ||:   3%|2         | 3/110 [00:02<01:13,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0535 ||:   4%|3         | 4/110 [00:02<01:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0520 ||:   5%|4         | 5/110 [00:03<01:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0561 ||:   5%|5         | 6/110 [00:04<01:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0556 ||:   6%|6         | 7/110 [00:04<01:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0577 ||:   7%|7         | 8/110 [00:05<01:08,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0591 ||:   8%|8         | 9/110 [00:05<01:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0605 ||:   9%|9         | 10/110 [00:06<01:04,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0647 ||:  10%|#         | 11/110 [00:07<01:06,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0616 ||:  11%|#         | 12/110 [00:07<01:04,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0603 ||:  12%|#1        | 13/110 [00:08<01:03,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0615 ||:  13%|#2        | 14/110 [00:09<01:02,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0604 ||:  14%|#3        | 15/110 [00:09<01:00,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0619 ||:  15%|#4        | 16/110 [00:10<00:59,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0618 ||:  15%|#5        | 17/110 [00:11<00:57,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0618 ||:  16%|#6        | 18/110 [00:11<00:59,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0606 ||:  17%|#7        | 19/110 [00:12<01:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0625 ||:  18%|#8        | 20/110 [00:13<00:58,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0621 ||:  19%|#9        | 21/110 [00:13<00:58,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0619 ||:  20%|##        | 22/110 [00:14<00:58,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0620 ||:  21%|##        | 23/110 [00:15<00:58,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0622 ||:  22%|##1       | 24/110 [00:16<01:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0639 ||:  23%|##2       | 25/110 [00:16<01:01,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0632 ||:  24%|##3       | 26/110 [00:17<00:57,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0633 ||:  25%|##4       | 27/110 [00:17<00:55,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0637 ||:  25%|##5       | 28/110 [00:18<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0643 ||:  26%|##6       | 29/110 [00:19<00:55,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0656 ||:  27%|##7       | 30/110 [00:20<00:55,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0658 ||:  28%|##8       | 31/110 [00:20<00:52,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0656 ||:  29%|##9       | 32/110 [00:21<00:56,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0656 ||:  30%|###       | 33/110 [00:22<00:55,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0649 ||:  31%|###       | 34/110 [00:22<00:52,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0646 ||:  32%|###1      | 35/110 [00:23<00:50,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0646 ||:  33%|###2      | 36/110 [00:24<00:49,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0648 ||:  34%|###3      | 37/110 [00:24<00:50,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0644 ||:  35%|###4      | 38/110 [00:25<00:47,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0640 ||:  35%|###5      | 39/110 [00:26<00:48,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0632 ||:  36%|###6      | 40/110 [00:26<00:46,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0628 ||:  37%|###7      | 41/110 [00:27<00:48,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0619 ||:  38%|###8      | 42/110 [00:28<00:47,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0629 ||:  39%|###9      | 43/110 [00:28<00:45,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0635 ||:  40%|####      | 44/110 [00:29<00:44,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0627 ||:  41%|####      | 45/110 [00:30<00:42,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0620 ||:  42%|####1     | 46/110 [00:30<00:40,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0624 ||:  43%|####2     | 47/110 [00:31<00:42,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0622 ||:  44%|####3     | 48/110 [00:32<00:41,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0618 ||:  45%|####4     | 49/110 [00:32<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0614 ||:  45%|####5     | 50/110 [00:33<00:40,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0624 ||:  46%|####6     | 51/110 [00:34<00:38,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0629 ||:  47%|####7     | 52/110 [00:34<00:37,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0631 ||:  48%|####8     | 53/110 [00:35<00:36,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0631 ||:  49%|####9     | 54/110 [00:36<00:36,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0632 ||:  50%|#####     | 55/110 [00:36<00:38,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0633 ||:  51%|#####     | 56/110 [00:37<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0632 ||:  52%|#####1    | 57/110 [00:38<00:35,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0633 ||:  53%|#####2    | 58/110 [00:38<00:36,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0633 ||:  54%|#####3    | 59/110 [00:39<00:35,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0629 ||:  55%|#####4    | 60/110 [00:40<00:35,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0635 ||:  55%|#####5    | 61/110 [00:40<00:32,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0631 ||:  56%|#####6    | 62/110 [00:41<00:32,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0630 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0631 ||:  58%|#####8    | 64/110 [00:43<00:31,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0635 ||:  59%|#####9    | 65/110 [00:43<00:30,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0633 ||:  60%|######    | 66/110 [00:44<00:29,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0633 ||:  61%|######    | 67/110 [00:44<00:28,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0635 ||:  62%|######1   | 68/110 [00:45<00:27,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0638 ||:  63%|######2   | 69/110 [00:46<00:26,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0639 ||:  64%|######3   | 70/110 [00:46<00:25,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0636 ||:  65%|######4   | 71/110 [00:47<00:24,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0638 ||:  65%|######5   | 72/110 [00:48<00:23,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0635 ||:  66%|######6   | 73/110 [00:48<00:24,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0632 ||:  67%|######7   | 74/110 [00:49<00:25,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0629 ||:  68%|######8   | 75/110 [00:50<00:25,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0627 ||:  69%|######9   | 76/110 [00:51<00:23,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0628 ||:  70%|#######   | 77/110 [00:51<00:22,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0632 ||:  71%|#######   | 78/110 [00:52<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0635 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0634 ||:  73%|#######2  | 80/110 [00:54<00:26,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0632 ||:  74%|#######3  | 81/110 [00:55<00:24,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0632 ||:  75%|#######4  | 82/110 [00:55<00:22,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0634 ||:  75%|#######5  | 83/110 [00:56<00:20,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0635 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0637 ||:  77%|#######7  | 85/110 [00:57<00:18,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0639 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0637 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0637 ||:  80%|########  | 88/110 [00:59<00:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0637 ||:  81%|########  | 89/110 [01:00<00:14,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0637 ||:  82%|########1 | 90/110 [01:01<00:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0640 ||:  83%|########2 | 91/110 [01:01<00:12,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0643 ||:  84%|########3 | 92/110 [01:02<00:11,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0643 ||:  85%|########4 | 93/110 [01:03<00:11,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0644 ||:  85%|########5 | 94/110 [01:03<00:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0642 ||:  86%|########6 | 95/110 [01:04<00:09,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0643 ||:  87%|########7 | 96/110 [01:05<00:08,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0645 ||:  88%|########8 | 97/110 [01:05<00:08,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0648 ||:  89%|########9 | 98/110 [01:06<00:07,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0649 ||:  90%|######### | 99/110 [01:07<00:07,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0653 ||:  91%|######### | 100/110 [01:07<00:06,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0655 ||:  92%|#########1| 101/110 [01:08<00:05,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0660 ||:  93%|#########2| 102/110 [01:09<00:05,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0661 ||:  94%|#########3| 103/110 [01:09<00:04,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0660 ||:  95%|#########4| 104/110 [01:10<00:03,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0661 ||:  95%|#########5| 105/110 [01:10<00:03,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0661 ||:  96%|#########6| 106/110 [01:11<00:02,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0662 ||:  97%|#########7| 107/110 [01:12<00:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0664 ||:  98%|#########8| 108/110 [01:13<00:01,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0664 ||:  99%|#########9| 109/110 [01:13<00:00,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0662 ||: 100%|##########| 110/110 [01:14<00:00,  1.74it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0662 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7077, acc: 0.6562, no_result: 0.0312, loss: 0.9824 ||:   4%|4         | 1/24 [00:00<00:08,  2.77it/s]
+BLEU: 0.6252, acc: 0.6719, no_result: 0.0625, loss: 0.9912 ||:   8%|8         | 2/24 [00:00<00:07,  2.84it/s]
+BLEU: 0.6320, acc: 0.6562, no_result: 0.1042, loss: 1.1128 ||:  12%|#2        | 3/24 [00:01<00:07,  2.87it/s]
+BLEU: 0.6704, acc: 0.6641, no_result: 0.1094, loss: 1.0702 ||:  17%|#6        | 4/24 [00:01<00:07,  2.76it/s]
+BLEU: 0.7019, acc: 0.6875, no_result: 0.1125, loss: 1.0312 ||:  21%|##        | 5/24 [00:01<00:06,  2.72it/s]
+BLEU: 0.7024, acc: 0.6562, no_result: 0.1094, loss: 1.0356 ||:  25%|##5       | 6/24 [00:02<00:06,  2.85it/s]
+BLEU: 0.7129, acc: 0.6652, no_result: 0.1027, loss: 1.0157 ||:  29%|##9       | 7/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.7111, acc: 0.6680, no_result: 0.1016, loss: 1.0318 ||:  33%|###3      | 8/24 [00:02<00:05,  2.99it/s]
+BLEU: 0.7036, acc: 0.6771, no_result: 0.0972, loss: 1.0232 ||:  38%|###7      | 9/24 [00:03<00:05,  2.94it/s]
+BLEU: 0.7140, acc: 0.6656, no_result: 0.0938, loss: 1.0345 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.7130, acc: 0.6449, no_result: 0.1051, loss: 1.0779 ||:  46%|####5     | 11/24 [00:03<00:04,  2.72it/s]
+BLEU: 0.7078, acc: 0.6432, no_result: 0.1016, loss: 1.0916 ||:  50%|#####     | 12/24 [00:04<00:04,  2.82it/s]
+BLEU: 0.7061, acc: 0.6442, no_result: 0.1010, loss: 1.1052 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.72it/s]
+BLEU: 0.7062, acc: 0.6295, no_result: 0.0982, loss: 1.1487 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.62it/s]
+BLEU: 0.6927, acc: 0.6083, no_result: 0.1208, loss: 1.2113 ||:  62%|######2   | 15/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.6915, acc: 0.6055, no_result: 0.1250, loss: 1.2358 ||:  67%|######6   | 16/24 [00:05<00:03,  2.37it/s]
+BLEU: 0.6886, acc: 0.5993, no_result: 0.1324, loss: 1.2285 ||:  71%|#######   | 17/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.6875, acc: 0.5938, no_result: 0.1372, loss: 1.2425 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.6903, acc: 0.6020, no_result: 0.1332, loss: 1.2142 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.6929, acc: 0.6094, no_result: 0.1297, loss: 1.2032 ||:  83%|########3 | 20/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.6902, acc: 0.6057, no_result: 0.1280, loss: 1.2104 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.6901, acc: 0.5994, no_result: 0.1236, loss: 1.2066 ||:  92%|#########1| 22/24 [00:08<00:00,  2.66it/s]
+BLEU: 0.6940, acc: 0.6087, no_result: 0.1209, loss: 1.1864 ||:  96%|#########5| 23/24 [00:08<00:00,  2.74it/s]
+BLEU: 0.6926, acc: 0.6065, no_result: 0.1205, loss: 1.2021 ||: 100%|##########| 24/24 [00:08<00:00,  3.38it/s]
+BLEU: 0.6926, acc: 0.6065, no_result: 0.1205, loss: 1.2021 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0760 ||:   1%|          | 1/110 [00:00<01:12,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0826 ||:   2%|1         | 2/110 [00:01<01:10,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0805 ||:   3%|2         | 3/110 [00:02<01:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0759 ||:   4%|3         | 4/110 [00:02<01:16,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0799 ||:   5%|4         | 5/110 [00:03<01:11,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0823 ||:   5%|5         | 6/110 [00:04<01:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0782 ||:   6%|6         | 7/110 [00:04<01:07,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0754 ||:   7%|7         | 8/110 [00:05<01:06,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0729 ||:   8%|8         | 9/110 [00:06<01:06,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0703 ||:   9%|9         | 10/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0695 ||:  10%|#         | 11/110 [00:07<01:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0669 ||:  11%|#         | 12/110 [00:08<01:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0639 ||:  12%|#1        | 13/110 [00:08<01:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0639 ||:  13%|#2        | 14/110 [00:09<01:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0620 ||:  14%|#3        | 15/110 [00:10<01:09,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0613 ||:  15%|#4        | 16/110 [00:10<01:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0617 ||:  15%|#5        | 17/110 [00:11<01:02,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0620 ||:  16%|#6        | 18/110 [00:12<01:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0609 ||:  17%|#7        | 19/110 [00:13<01:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0606 ||:  18%|#8        | 20/110 [00:13<01:01,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0628 ||:  19%|#9        | 21/110 [00:14<00:59,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0617 ||:  20%|##        | 22/110 [00:14<00:55,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0616 ||:  21%|##        | 23/110 [00:15<00:55,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0612 ||:  22%|##1       | 24/110 [00:16<00:54,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0601 ||:  23%|##2       | 25/110 [00:16<00:55,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0601 ||:  24%|##3       | 26/110 [00:17<00:53,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0602 ||:  25%|##4       | 27/110 [00:18<00:52,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0594 ||:  25%|##5       | 28/110 [00:18<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0586 ||:  26%|##6       | 29/110 [00:19<00:52,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0581 ||:  27%|##7       | 30/110 [00:20<00:51,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0580 ||:  28%|##8       | 31/110 [00:20<00:49,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0594 ||:  29%|##9       | 32/110 [00:21<00:49,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0585 ||:  30%|###       | 33/110 [00:21<00:48,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0583 ||:  31%|###       | 34/110 [00:22<00:48,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0586 ||:  32%|###1      | 35/110 [00:23<00:52,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0588 ||:  33%|###2      | 36/110 [00:24<00:50,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0586 ||:  34%|###3      | 37/110 [00:24<00:49,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0582 ||:  35%|###4      | 38/110 [00:25<00:47,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0576 ||:  35%|###5      | 39/110 [00:26<00:47,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0577 ||:  36%|###6      | 40/110 [00:26<00:47,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0581 ||:  37%|###7      | 41/110 [00:27<00:44,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0580 ||:  38%|###8      | 42/110 [00:28<00:46,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0577 ||:  39%|###9      | 43/110 [00:28<00:46,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0579 ||:  40%|####      | 44/110 [00:29<00:46,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0586 ||:  41%|####      | 45/110 [00:30<00:44,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0579 ||:  42%|####1     | 46/110 [00:30<00:43,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0579 ||:  43%|####2     | 47/110 [00:31<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0579 ||:  44%|####3     | 48/110 [00:32<00:43,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0576 ||:  45%|####4     | 49/110 [00:32<00:42,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0586 ||:  45%|####5     | 50/110 [00:33<00:41,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0582 ||:  46%|####6     | 51/110 [00:34<00:41,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0591 ||:  47%|####7     | 52/110 [00:35<00:40,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0591 ||:  48%|####8     | 53/110 [00:35<00:38,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0584 ||:  49%|####9     | 54/110 [00:36<00:38,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0581 ||:  50%|#####     | 55/110 [00:37<00:38,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0581 ||:  51%|#####     | 56/110 [00:37<00:35,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0583 ||:  52%|#####1    | 57/110 [00:38<00:36,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0587 ||:  53%|#####2    | 58/110 [00:39<00:35,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0587 ||:  54%|#####3    | 59/110 [00:39<00:35,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0590 ||:  55%|#####4    | 60/110 [00:40<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0585 ||:  55%|#####5    | 61/110 [00:41<00:34,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0584 ||:  56%|#####6    | 62/110 [00:41<00:33,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0583 ||:  57%|#####7    | 63/110 [00:42<00:32,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0582 ||:  58%|#####8    | 64/110 [00:43<00:31,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0582 ||:  59%|#####9    | 65/110 [00:43<00:30,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0585 ||:  60%|######    | 66/110 [00:44<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0583 ||:  61%|######    | 67/110 [00:45<00:29,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0580 ||:  62%|######1   | 68/110 [00:45<00:28,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0579 ||:  63%|######2   | 69/110 [00:46<00:27,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0579 ||:  64%|######3   | 70/110 [00:48<00:39,  1.01it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0579 ||:  65%|######4   | 71/110 [00:48<00:34,  1.14it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0579 ||:  65%|######5   | 72/110 [00:49<00:31,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0577 ||:  66%|######6   | 73/110 [00:50<00:29,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0575 ||:  67%|######7   | 74/110 [00:50<00:26,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0576 ||:  68%|######8   | 75/110 [00:51<00:26,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0573 ||:  69%|######9   | 76/110 [00:52<00:25,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0571 ||:  70%|#######   | 77/110 [00:53<00:23,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0569 ||:  71%|#######   | 78/110 [00:53<00:23,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0568 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0569 ||:  73%|#######2  | 80/110 [00:55<00:19,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0570 ||:  74%|#######3  | 81/110 [00:55<00:18,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0571 ||:  75%|#######4  | 82/110 [00:56<00:19,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0573 ||:  75%|#######5  | 83/110 [00:57<00:17,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0569 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0569 ||:  77%|#######7  | 85/110 [00:58<00:17,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0569 ||:  78%|#######8  | 86/110 [00:59<00:15,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0570 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0566 ||:  80%|########  | 88/110 [01:00<00:14,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0566 ||:  81%|########  | 89/110 [01:01<00:13,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0568 ||:  82%|########1 | 90/110 [01:01<00:13,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0569 ||:  83%|########2 | 91/110 [01:02<00:12,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0567 ||:  84%|########3 | 92/110 [01:02<00:11,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0565 ||:  85%|########4 | 93/110 [01:03<00:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0564 ||:  85%|########5 | 94/110 [01:04<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0563 ||:  86%|########6 | 95/110 [01:05<00:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0563 ||:  87%|########7 | 96/110 [01:05<00:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0560 ||:  88%|########8 | 97/110 [01:06<00:08,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0559 ||:  89%|########9 | 98/110 [01:06<00:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0559 ||:  90%|######### | 99/110 [01:07<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0557 ||:  91%|######### | 100/110 [01:08<00:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0556 ||:  92%|#########1| 101/110 [01:09<00:06,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0557 ||:  93%|#########2| 102/110 [01:09<00:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0558 ||:  94%|#########3| 103/110 [01:10<00:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0560 ||:  95%|#########4| 104/110 [01:11<00:03,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0561 ||:  95%|#########5| 105/110 [01:11<00:03,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0562 ||:  96%|#########6| 106/110 [01:12<00:02,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0563 ||:  97%|#########7| 107/110 [01:13<00:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0562 ||:  98%|#########8| 108/110 [01:13<00:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0562 ||:  99%|#########9| 109/110 [01:14<00:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0561 ||: 100%|##########| 110/110 [01:14<00:00,  1.76it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0561 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7126, acc: 0.7188, no_result: 0.0625, loss: 1.0269 ||:   4%|4         | 1/24 [00:00<00:08,  2.65it/s]
+BLEU: 0.6793, acc: 0.7344, no_result: 0.0781, loss: 0.9893 ||:   8%|8         | 2/24 [00:00<00:08,  2.70it/s]
+BLEU: 0.6660, acc: 0.6979, no_result: 0.1250, loss: 1.1171 ||:  12%|#2        | 3/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.6917, acc: 0.7031, no_result: 0.1250, loss: 1.0472 ||:  17%|#6        | 4/24 [00:01<00:07,  2.67it/s]
+BLEU: 0.7205, acc: 0.7188, no_result: 0.1250, loss: 1.0124 ||:  21%|##        | 5/24 [00:01<00:07,  2.62it/s]
+BLEU: 0.7181, acc: 0.6823, no_result: 0.1146, loss: 1.0084 ||:  25%|##5       | 6/24 [00:02<00:06,  2.76it/s]
+BLEU: 0.7282, acc: 0.6920, no_result: 0.1071, loss: 0.9921 ||:  29%|##9       | 7/24 [00:02<00:05,  2.90it/s]
+BLEU: 0.7270, acc: 0.6953, no_result: 0.1094, loss: 1.0170 ||:  33%|###3      | 8/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7167, acc: 0.6944, no_result: 0.1076, loss: 1.0091 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7290, acc: 0.6813, no_result: 0.1000, loss: 1.0256 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.7282, acc: 0.6619, no_result: 0.1108, loss: 1.0720 ||:  46%|####5     | 11/24 [00:03<00:04,  2.71it/s]
+BLEU: 0.7227, acc: 0.6667, no_result: 0.1068, loss: 1.0856 ||:  50%|#####     | 12/24 [00:04<00:04,  2.80it/s]
+BLEU: 0.7212, acc: 0.6611, no_result: 0.1130, loss: 1.1041 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7206, acc: 0.6496, no_result: 0.1138, loss: 1.1515 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.60it/s]
+BLEU: 0.7106, acc: 0.6312, no_result: 0.1375, loss: 1.2076 ||:  62%|######2   | 15/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.7102, acc: 0.6270, no_result: 0.1387, loss: 1.2289 ||:  67%|######6   | 16/24 [00:06<00:03,  2.38it/s]
+BLEU: 0.7073, acc: 0.6195, no_result: 0.1415, loss: 1.2207 ||:  71%|#######   | 17/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.7026, acc: 0.6163, no_result: 0.1441, loss: 1.2323 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.7056, acc: 0.6234, no_result: 0.1398, loss: 1.2103 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.7082, acc: 0.6281, no_result: 0.1359, loss: 1.1991 ||:  83%|########3 | 20/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7048, acc: 0.6235, no_result: 0.1324, loss: 1.2063 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7093, acc: 0.6179, no_result: 0.1293, loss: 1.2024 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.7125, acc: 0.6277, no_result: 0.1236, loss: 1.1810 ||:  96%|#########5| 23/24 [00:08<00:00,  2.74it/s]
+BLEU: 0.7128, acc: 0.6247, no_result: 0.1231, loss: 1.1966 ||: 100%|##########| 24/24 [00:08<00:00,  3.40it/s]
+BLEU: 0.7128, acc: 0.6247, no_result: 0.1231, loss: 1.1966 ||: 100%|##########| 24/24 [00:08<00:00,  2.74it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0740 ||:   1%|          | 1/110 [00:00<01:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0686 ||:   2%|1         | 2/110 [00:01<01:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0536 ||:   3%|2         | 3/110 [00:01<01:09,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0486 ||:   4%|3         | 4/110 [00:02<01:04,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0518 ||:   5%|4         | 5/110 [00:03<01:04,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0531 ||:   5%|5         | 6/110 [00:03<01:06,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0533 ||:   6%|6         | 7/110 [00:04<01:05,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0514 ||:   7%|7         | 8/110 [00:05<01:03,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0502 ||:   8%|8         | 9/110 [00:05<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0507 ||:   9%|9         | 10/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0497 ||:  10%|#         | 11/110 [00:07<01:05,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0489 ||:  11%|#         | 12/110 [00:07<01:04,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0493 ||:  12%|#1        | 13/110 [00:08<01:07,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0486 ||:  13%|#2        | 14/110 [00:09<01:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0476 ||:  14%|#3        | 15/110 [00:09<01:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0481 ||:  15%|#4        | 16/110 [00:10<01:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0485 ||:  15%|#5        | 17/110 [00:11<01:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0487 ||:  16%|#6        | 18/110 [00:11<01:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0482 ||:  17%|#7        | 19/110 [00:12<01:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0480 ||:  18%|#8        | 20/110 [00:13<01:03,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  19%|#9        | 21/110 [00:14<01:04,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  20%|##        | 22/110 [00:14<01:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0480 ||:  21%|##        | 23/110 [00:15<01:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0484 ||:  22%|##1       | 24/110 [00:16<01:01,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0489 ||:  23%|##2       | 25/110 [00:16<00:58,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0490 ||:  24%|##3       | 26/110 [00:17<01:00,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0492 ||:  25%|##4       | 27/110 [00:18<00:58,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0484 ||:  25%|##5       | 28/110 [00:19<00:55,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0484 ||:  26%|##6       | 29/110 [00:19<00:56,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0490 ||:  27%|##7       | 30/110 [00:20<00:56,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0488 ||:  28%|##8       | 31/110 [00:21<00:52,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0483 ||:  29%|##9       | 32/110 [00:21<00:49,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0483 ||:  30%|###       | 33/110 [00:22<00:48,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0493 ||:  31%|###       | 34/110 [00:22<00:49,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0488 ||:  32%|###1      | 35/110 [00:23<00:52,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0482 ||:  33%|###2      | 36/110 [00:24<00:49,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0483 ||:  34%|###3      | 37/110 [00:24<00:46,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||:  35%|###4      | 38/110 [00:25<00:46,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0479 ||:  35%|###5      | 39/110 [00:26<00:44,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0483 ||:  36%|###6      | 40/110 [00:26<00:44,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0479 ||:  37%|###7      | 41/110 [00:27<00:43,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  38%|###8      | 42/110 [00:28<00:44,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  39%|###9      | 43/110 [00:28<00:44,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0481 ||:  40%|####      | 44/110 [00:29<00:43,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0484 ||:  41%|####      | 45/110 [00:30<00:43,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0481 ||:  42%|####1     | 46/110 [00:30<00:43,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||:  43%|####2     | 47/110 [00:31<00:44,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0481 ||:  44%|####3     | 48/110 [00:32<00:42,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0477 ||:  45%|####4     | 49/110 [00:32<00:41,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  45%|####5     | 50/110 [00:33<00:40,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  46%|####6     | 51/110 [00:34<00:39,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  47%|####7     | 52/110 [00:34<00:37,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0474 ||:  48%|####8     | 53/110 [00:35<00:36,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  49%|####9     | 54/110 [00:36<00:36,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  50%|#####     | 55/110 [00:36<00:36,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  51%|#####     | 56/110 [00:37<00:38,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0474 ||:  52%|#####1    | 57/110 [00:38<00:38,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  53%|#####2    | 58/110 [00:39<00:37,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  54%|#####3    | 59/110 [00:39<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  55%|#####4    | 60/110 [00:41<00:48,  1.02it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  55%|#####5    | 61/110 [00:42<00:42,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  56%|#####6    | 62/110 [00:42<00:37,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  57%|#####7    | 63/110 [00:43<00:35,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  58%|#####8    | 64/110 [00:43<00:33,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0476 ||:  59%|#####9    | 65/110 [00:44<00:32,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  60%|######    | 66/110 [00:45<00:33,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0476 ||:  61%|######    | 67/110 [00:46<00:31,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  62%|######1   | 68/110 [00:46<00:29,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  63%|######2   | 69/110 [00:47<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  64%|######3   | 70/110 [00:48<00:26,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  65%|######4   | 71/110 [00:48<00:25,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0469 ||:  65%|######5   | 72/110 [00:49<00:25,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  66%|######6   | 73/110 [00:50<00:25,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  67%|######7   | 74/110 [00:50<00:24,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  68%|######8   | 75/110 [00:51<00:24,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  69%|######9   | 76/110 [00:52<00:22,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  70%|#######   | 77/110 [00:52<00:21,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  71%|#######   | 78/110 [00:53<00:21,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  72%|#######1  | 79/110 [00:54<00:20,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  74%|#######3  | 81/110 [00:55<00:21,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0474 ||:  75%|#######4  | 82/110 [00:56<00:20,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0474 ||:  75%|#######5  | 83/110 [00:57<00:19,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0476 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0481 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0479 ||:  78%|#######8  | 86/110 [00:58<00:15,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0479 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||:  80%|########  | 88/110 [01:00<00:14,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||:  81%|########  | 89/110 [01:01<00:14,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||:  82%|########1 | 90/110 [01:01<00:13,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0476 ||:  83%|########2 | 91/110 [01:02<00:12,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0480 ||:  84%|########3 | 92/110 [01:02<00:11,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0481 ||:  85%|########4 | 93/110 [01:03<00:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0482 ||:  85%|########5 | 94/110 [01:04<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0482 ||:  86%|########6 | 95/110 [01:04<00:09,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0480 ||:  87%|########7 | 96/110 [01:05<00:09,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0480 ||:  88%|########8 | 97/110 [01:06<00:08,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0485 ||:  89%|########9 | 98/110 [01:06<00:07,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0485 ||:  90%|######### | 99/110 [01:07<00:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0483 ||:  91%|######### | 100/110 [01:08<00:06,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0482 ||:  92%|#########1| 101/110 [01:08<00:05,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0481 ||:  93%|#########2| 102/110 [01:09<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0484 ||:  94%|#########3| 103/110 [01:10<00:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0485 ||:  95%|#########4| 104/110 [01:10<00:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0488 ||:  95%|#########5| 105/110 [01:11<00:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0487 ||:  96%|#########6| 106/110 [01:12<00:02,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0487 ||:  97%|#########7| 107/110 [01:12<00:01,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0486 ||:  98%|#########8| 108/110 [01:13<00:01,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0487 ||:  99%|#########9| 109/110 [01:14<00:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0486 ||: 100%|##########| 110/110 [01:14<00:00,  1.77it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0486 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7126, acc: 0.6562, no_result: 0.0938, loss: 1.0712 ||:   4%|4         | 1/24 [00:00<00:08,  2.69it/s]
+BLEU: 0.6733, acc: 0.6875, no_result: 0.0938, loss: 0.9854 ||:   8%|8         | 2/24 [00:00<00:07,  2.81it/s]
+BLEU: 0.6569, acc: 0.6771, no_result: 0.1146, loss: 1.1314 ||:  12%|#2        | 3/24 [00:01<00:07,  2.83it/s]
+BLEU: 0.6843, acc: 0.6797, no_result: 0.1172, loss: 1.0560 ||:  17%|#6        | 4/24 [00:01<00:07,  2.75it/s]
+BLEU: 0.7126, acc: 0.6937, no_result: 0.1125, loss: 1.0186 ||:  21%|##        | 5/24 [00:01<00:07,  2.71it/s]
+BLEU: 0.7122, acc: 0.6823, no_result: 0.1094, loss: 1.0165 ||:  25%|##5       | 6/24 [00:02<00:06,  2.82it/s]
+BLEU: 0.7214, acc: 0.6920, no_result: 0.1027, loss: 1.0017 ||:  29%|##9       | 7/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.7204, acc: 0.6992, no_result: 0.1016, loss: 1.0316 ||:  33%|###3      | 8/24 [00:02<00:05,  2.98it/s]
+BLEU: 0.7131, acc: 0.7049, no_result: 0.0972, loss: 1.0237 ||:  38%|###7      | 9/24 [00:03<00:05,  2.93it/s]
+BLEU: 0.7249, acc: 0.6875, no_result: 0.0938, loss: 1.0439 ||:  42%|####1     | 10/24 [00:03<00:04,  2.85it/s]
+BLEU: 0.7248, acc: 0.6705, no_result: 0.0994, loss: 1.0911 ||:  46%|####5     | 11/24 [00:03<00:04,  2.71it/s]
+BLEU: 0.7211, acc: 0.6719, no_result: 0.0938, loss: 1.1036 ||:  50%|#####     | 12/24 [00:04<00:04,  2.79it/s]
+BLEU: 0.7193, acc: 0.6707, no_result: 0.0986, loss: 1.1217 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.68it/s]
+BLEU: 0.7188, acc: 0.6562, no_result: 0.0982, loss: 1.1681 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.51it/s]
+BLEU: 0.7077, acc: 0.6354, no_result: 0.1229, loss: 1.2284 ||:  62%|######2   | 15/24 [00:05<00:03,  2.34it/s]
+BLEU: 0.7076, acc: 0.6328, no_result: 0.1270, loss: 1.2517 ||:  67%|######6   | 16/24 [00:06<00:03,  2.32it/s]
+BLEU: 0.7055, acc: 0.6232, no_result: 0.1342, loss: 1.2447 ||:  71%|#######   | 17/24 [00:06<00:02,  2.40it/s]
+BLEU: 0.7014, acc: 0.6198, no_result: 0.1389, loss: 1.2567 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.39it/s]
+BLEU: 0.7041, acc: 0.6250, no_result: 0.1349, loss: 1.2369 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.50it/s]
+BLEU: 0.7058, acc: 0.6281, no_result: 0.1313, loss: 1.2272 ||:  83%|########3 | 20/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.7011, acc: 0.6265, no_result: 0.1265, loss: 1.2325 ||:  88%|########7 | 21/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.7048, acc: 0.6222, no_result: 0.1236, loss: 1.2332 ||:  92%|#########1| 22/24 [00:08<00:00,  2.61it/s]
+BLEU: 0.7076, acc: 0.6304, no_result: 0.1182, loss: 1.2107 ||:  96%|#########5| 23/24 [00:08<00:00,  2.70it/s]
+BLEU: 0.7074, acc: 0.6273, no_result: 0.1179, loss: 1.2261 ||: 100%|##########| 24/24 [00:08<00:00,  3.36it/s]
+BLEU: 0.7074, acc: 0.6273, no_result: 0.1179, loss: 1.2261 ||: 100%|##########| 24/24 [00:08<00:00,  2.73it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0449 ||:   1%|          | 1/110 [00:00<01:13,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0392 ||:   2%|1         | 2/110 [00:01<01:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0341 ||:   3%|2         | 3/110 [00:02<01:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0405 ||:   4%|3         | 4/110 [00:02<01:07,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0529 ||:   5%|4         | 5/110 [00:03<01:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0482 ||:   5%|5         | 6/110 [00:04<01:12,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0480 ||:   6%|6         | 7/110 [00:04<01:12,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0483 ||:   7%|7         | 8/110 [00:05<01:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0490 ||:   8%|8         | 9/110 [00:06<01:12,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0495 ||:   9%|9         | 10/110 [00:06<01:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||:  10%|#         | 11/110 [00:07<01:10,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0476 ||:  11%|#         | 12/110 [00:08<01:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  12%|#1        | 13/110 [00:08<01:05,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0486 ||:  13%|#2        | 14/110 [00:09<01:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0486 ||:  14%|#3        | 15/110 [00:10<01:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0495 ||:  15%|#4        | 16/110 [00:11<01:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0505 ||:  15%|#5        | 17/110 [00:11<01:02,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0502 ||:  16%|#6        | 18/110 [00:12<00:59,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0503 ||:  17%|#7        | 19/110 [00:12<00:59,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0506 ||:  18%|#8        | 20/110 [00:13<00:58,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0493 ||:  19%|#9        | 21/110 [00:14<00:59,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0491 ||:  20%|##        | 22/110 [00:14<00:58,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0497 ||:  21%|##        | 23/110 [00:15<00:56,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0497 ||:  22%|##1       | 24/110 [00:16<00:56,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0487 ||:  23%|##2       | 25/110 [00:16<00:59,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0487 ||:  24%|##3       | 26/110 [00:17<00:57,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0498 ||:  25%|##4       | 27/110 [00:18<00:55,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0499 ||:  25%|##5       | 28/110 [00:18<00:52,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0492 ||:  26%|##6       | 29/110 [00:19<00:50,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0486 ||:  27%|##7       | 30/110 [00:20<00:52,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0489 ||:  28%|##8       | 31/110 [00:20<00:52,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0490 ||:  29%|##9       | 32/110 [00:21<00:51,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0487 ||:  30%|###       | 33/110 [00:22<00:50,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0488 ||:  31%|###       | 34/110 [00:22<00:50,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0491 ||:  32%|###1      | 35/110 [00:23<00:47,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0498 ||:  33%|###2      | 36/110 [00:24<00:47,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0494 ||:  34%|###3      | 37/110 [00:24<00:45,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0493 ||:  35%|###4      | 38/110 [00:25<00:47,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0491 ||:  35%|###5      | 39/110 [00:25<00:44,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0488 ||:  36%|###6      | 40/110 [00:26<00:49,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0488 ||:  37%|###7      | 41/110 [00:27<00:47,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0485 ||:  38%|###8      | 42/110 [00:28<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0480 ||:  39%|###9      | 43/110 [00:28<00:44,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0477 ||:  40%|####      | 44/110 [00:29<00:43,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0474 ||:  41%|####      | 45/110 [00:30<00:45,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  42%|####1     | 46/110 [00:30<00:43,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0469 ||:  43%|####2     | 47/110 [00:31<00:41,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  44%|####3     | 48/110 [00:32<00:39,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  45%|####4     | 49/110 [00:32<00:38,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  45%|####5     | 50/110 [00:34<00:52,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  46%|####6     | 51/110 [00:34<00:48,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  47%|####7     | 52/110 [00:35<00:45,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0466 ||:  48%|####8     | 53/110 [00:36<00:41,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0466 ||:  49%|####9     | 54/110 [00:36<00:39,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0466 ||:  50%|#####     | 55/110 [00:37<00:39,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0467 ||:  51%|#####     | 56/110 [00:38<00:38,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0466 ||:  52%|#####1    | 57/110 [00:38<00:38,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  53%|#####2    | 58/110 [00:39<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  54%|#####3    | 59/110 [00:40<00:34,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  55%|#####4    | 60/110 [00:40<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0469 ||:  55%|#####5    | 61/110 [00:41<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  56%|#####6    | 62/110 [00:42<00:32,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  57%|#####7    | 63/110 [00:42<00:32,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  58%|#####8    | 64/110 [00:43<00:34,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  59%|#####9    | 65/110 [00:44<00:32,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  60%|######    | 66/110 [00:45<00:30,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  61%|######    | 67/110 [00:45<00:31,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  62%|######1   | 68/110 [00:46<00:29,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0462 ||:  63%|######2   | 69/110 [00:47<00:28,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0463 ||:  64%|######3   | 70/110 [00:48<00:29,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0463 ||:  65%|######4   | 71/110 [00:48<00:28,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0462 ||:  65%|######5   | 72/110 [00:49<00:26,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0463 ||:  66%|######6   | 73/110 [00:50<00:24,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0463 ||:  67%|######7   | 74/110 [00:50<00:23,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0463 ||:  68%|######8   | 75/110 [00:51<00:23,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:  69%|######9   | 76/110 [00:52<00:23,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0466 ||:  70%|#######   | 77/110 [00:52<00:22,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0466 ||:  71%|#######   | 78/110 [00:53<00:21,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0466 ||:  72%|#######1  | 79/110 [00:54<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:  74%|#######3  | 81/110 [00:55<00:18,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0461 ||:  75%|#######4  | 82/110 [00:55<00:17,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  76%|#######6  | 84/110 [00:57<00:16,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0469 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0469 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  80%|########  | 88/110 [00:59<00:14,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0469 ||:  81%|########  | 89/110 [01:00<00:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  82%|########1 | 90/110 [01:01<00:13,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  83%|########2 | 91/110 [01:01<00:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  84%|########3 | 92/110 [01:02<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  85%|########4 | 93/110 [01:03<00:11,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  85%|########5 | 94/110 [01:03<00:10,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0469 ||:  86%|########6 | 95/110 [01:04<00:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  87%|########7 | 96/110 [01:05<00:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  88%|########8 | 97/110 [01:05<00:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  89%|########9 | 98/110 [01:06<00:07,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  90%|######### | 99/110 [01:07<00:07,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0476 ||:  91%|######### | 100/110 [01:07<00:06,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0477 ||:  92%|#########1| 101/110 [01:08<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0477 ||:  93%|#########2| 102/110 [01:09<00:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||:  94%|#########3| 103/110 [01:09<00:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0477 ||:  95%|#########4| 104/110 [01:10<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0480 ||:  95%|#########5| 105/110 [01:11<00:03,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0480 ||:  96%|#########6| 106/110 [01:11<00:02,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0481 ||:  97%|#########7| 107/110 [01:12<00:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0480 ||:  98%|#########8| 108/110 [01:13<00:01,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0479 ||:  99%|#########9| 109/110 [01:13<00:00,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||: 100%|##########| 110/110 [01:14<00:00,  1.78it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6925, acc: 0.6562, no_result: 0.0312, loss: 1.1396 ||:   4%|4         | 1/24 [00:00<00:08,  2.79it/s]
+BLEU: 0.6680, acc: 0.6875, no_result: 0.0469, loss: 1.0651 ||:   8%|8         | 2/24 [00:00<00:07,  2.84it/s]
+BLEU: 0.6619, acc: 0.6771, no_result: 0.1042, loss: 1.1869 ||:  12%|#2        | 3/24 [00:01<00:07,  2.85it/s]
+BLEU: 0.6966, acc: 0.6875, no_result: 0.1094, loss: 1.0901 ||:  17%|#6        | 4/24 [00:01<00:07,  2.76it/s]
+BLEU: 0.7229, acc: 0.7063, no_result: 0.1062, loss: 1.0504 ||:  21%|##        | 5/24 [00:01<00:07,  2.71it/s]
+BLEU: 0.7218, acc: 0.6771, no_result: 0.1094, loss: 1.0423 ||:  25%|##5       | 6/24 [00:02<00:06,  2.81it/s]
+BLEU: 0.7258, acc: 0.6875, no_result: 0.1027, loss: 1.0325 ||:  29%|##9       | 7/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7265, acc: 0.6914, no_result: 0.1016, loss: 1.0524 ||:  33%|###3      | 8/24 [00:02<00:05,  2.86it/s]
+BLEU: 0.7121, acc: 0.6979, no_result: 0.0972, loss: 1.0402 ||:  38%|###7      | 9/24 [00:03<00:05,  2.81it/s]
+BLEU: 0.7231, acc: 0.6875, no_result: 0.0969, loss: 1.0545 ||:  42%|####1     | 10/24 [00:03<00:05,  2.73it/s]
+BLEU: 0.7248, acc: 0.6648, no_result: 0.1023, loss: 1.0983 ||:  46%|####5     | 11/24 [00:03<00:04,  2.63it/s]
+BLEU: 0.7206, acc: 0.6667, no_result: 0.0964, loss: 1.1121 ||:  50%|#####     | 12/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7203, acc: 0.6659, no_result: 0.0986, loss: 1.1270 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.64it/s]
+BLEU: 0.7197, acc: 0.6540, no_result: 0.1004, loss: 1.1741 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.57it/s]
+BLEU: 0.7098, acc: 0.6354, no_result: 0.1208, loss: 1.2346 ||:  62%|######2   | 15/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.7079, acc: 0.6270, no_result: 0.1250, loss: 1.2552 ||:  67%|######6   | 16/24 [00:06<00:03,  2.35it/s]
+BLEU: 0.7056, acc: 0.6195, no_result: 0.1305, loss: 1.2456 ||:  71%|#######   | 17/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.6992, acc: 0.6128, no_result: 0.1337, loss: 1.2529 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.7026, acc: 0.6201, no_result: 0.1299, loss: 1.2325 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.7051, acc: 0.6234, no_result: 0.1266, loss: 1.2219 ||:  83%|########3 | 20/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7019, acc: 0.6190, no_result: 0.1235, loss: 1.2288 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7075, acc: 0.6136, no_result: 0.1207, loss: 1.2238 ||:  92%|#########1| 22/24 [00:08<00:00,  2.63it/s]
+BLEU: 0.7099, acc: 0.6223, no_result: 0.1168, loss: 1.2028 ||:  96%|#########5| 23/24 [00:08<00:00,  2.72it/s]
+BLEU: 0.7095, acc: 0.6195, no_result: 0.1166, loss: 1.2262 ||: 100%|##########| 24/24 [00:08<00:00,  3.38it/s]
+BLEU: 0.7095, acc: 0.6195, no_result: 0.1166, loss: 1.2262 ||: 100%|##########| 24/24 [00:08<00:00,  2.73it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:   1%|          | 1/110 [00:00<01:29,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0377 ||:   2%|1         | 2/110 [00:01<01:34,  1.14it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0407 ||:   3%|2         | 3/110 [00:02<01:25,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0453 ||:   4%|3         | 4/110 [00:03<01:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0457 ||:   5%|4         | 5/110 [00:03<01:12,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0490 ||:   5%|5         | 6/110 [00:04<01:08,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0479 ||:   6%|6         | 7/110 [00:04<01:06,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0456 ||:   7%|7         | 8/110 [00:05<01:02,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0456 ||:   8%|8         | 9/110 [00:06<01:07,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0444 ||:   9%|9         | 10/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  10%|#         | 11/110 [00:07<01:04,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  11%|#         | 12/110 [00:08<01:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0448 ||:  12%|#1        | 13/110 [00:08<01:05,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0452 ||:  13%|#2        | 14/110 [00:09<01:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  14%|#3        | 15/110 [00:10<01:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0453 ||:  15%|#4        | 16/110 [00:10<01:00,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  15%|#5        | 17/110 [00:11<01:04,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  16%|#6        | 18/110 [00:12<01:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0458 ||:  17%|#7        | 19/110 [00:13<01:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0453 ||:  18%|#8        | 20/110 [00:13<01:03,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  19%|#9        | 21/110 [00:14<01:03,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0467 ||:  20%|##        | 22/110 [00:15<00:59,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0476 ||:  21%|##        | 23/110 [00:15<00:56,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0480 ||:  22%|##1       | 24/110 [00:16<00:56,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0479 ||:  23%|##2       | 25/110 [00:17<00:59,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0488 ||:  24%|##3       | 26/110 [00:17<00:56,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0484 ||:  25%|##4       | 27/110 [00:18<00:55,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0484 ||:  25%|##5       | 28/110 [00:19<00:54,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0484 ||:  26%|##6       | 29/110 [00:19<00:52,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0483 ||:  27%|##7       | 30/110 [00:20<00:53,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0483 ||:  28%|##8       | 31/110 [00:21<00:53,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0482 ||:  29%|##9       | 32/110 [00:21<00:52,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0482 ||:  30%|###       | 33/110 [00:22<00:50,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  31%|###       | 34/110 [00:23<00:51,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  32%|###1      | 35/110 [00:23<00:48,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0467 ||:  33%|###2      | 36/110 [00:24<00:47,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0469 ||:  34%|###3      | 37/110 [00:25<00:47,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  35%|###4      | 38/110 [00:25<00:44,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0477 ||:  35%|###5      | 39/110 [00:26<00:43,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  36%|###6      | 40/110 [00:27<01:00,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0481 ||:  37%|###7      | 41/110 [00:28<00:55,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0479 ||:  38%|###8      | 42/110 [00:28<00:51,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  39%|###9      | 43/110 [00:29<00:48,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  40%|####      | 44/110 [00:30<00:46,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  41%|####      | 45/110 [00:30<00:44,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0476 ||:  42%|####1     | 46/110 [00:31<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  43%|####2     | 47/110 [00:32<00:44,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  44%|####3     | 48/110 [00:32<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0477 ||:  45%|####4     | 49/110 [00:33<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0479 ||:  45%|####5     | 50/110 [00:34<00:38,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||:  46%|####6     | 51/110 [00:34<00:39,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||:  47%|####7     | 52/110 [00:35<00:38,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  48%|####8     | 53/110 [00:36<00:37,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0469 ||:  49%|####9     | 54/110 [00:36<00:36,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0473 ||:  50%|#####     | 55/110 [00:37<00:36,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0474 ||:  51%|#####     | 56/110 [00:38<00:35,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  52%|#####1    | 57/110 [00:38<00:37,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  55%|#####4    | 60/110 [00:40<00:32,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0467 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  56%|#####6    | 62/110 [00:42<00:31,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  58%|#####8    | 64/110 [00:43<00:31,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  60%|######    | 66/110 [00:45<00:31,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0467 ||:  61%|######    | 67/110 [00:45<00:31,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  62%|######1   | 68/110 [00:46<00:29,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0463 ||:  63%|######2   | 69/110 [00:47<00:31,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0460 ||:  64%|######3   | 70/110 [00:47<00:28,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0458 ||:  65%|######4   | 71/110 [00:48<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0457 ||:  65%|######5   | 72/110 [00:49<00:25,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0456 ||:  66%|######6   | 73/110 [00:49<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0455 ||:  67%|######7   | 74/110 [00:50<00:24,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0457 ||:  68%|######8   | 75/110 [00:51<00:22,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0458 ||:  69%|######9   | 76/110 [00:51<00:23,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0459 ||:  70%|#######   | 77/110 [00:52<00:22,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0461 ||:  71%|#######   | 78/110 [00:53<00:22,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0458 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0458 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0459 ||:  74%|#######3  | 81/110 [00:55<00:19,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0460 ||:  75%|#######4  | 82/110 [00:56<00:19,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0458 ||:  75%|#######5  | 83/110 [00:56<00:19,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0460 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0459 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0460 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0459 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0458 ||:  80%|########  | 88/110 [01:00<00:14,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0457 ||:  81%|########  | 89/110 [01:00<00:13,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0461 ||:  82%|########1 | 90/110 [01:01<00:12,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:  83%|########2 | 91/110 [01:01<00:11,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:  84%|########3 | 92/110 [01:02<00:11,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:  85%|########4 | 93/110 [01:03<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:  85%|########5 | 94/110 [01:03<00:10,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:  86%|########6 | 95/110 [01:04<00:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:  87%|########7 | 96/110 [01:05<00:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0462 ||:  88%|########8 | 97/110 [01:05<00:08,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0463 ||:  89%|########9 | 98/110 [01:06<00:08,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:  90%|######### | 99/110 [01:07<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0466 ||:  91%|######### | 100/110 [01:07<00:06,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0464 ||:  92%|#########1| 101/110 [01:08<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0463 ||:  93%|#########2| 102/110 [01:09<00:05,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0462 ||:  94%|#########3| 103/110 [01:09<00:04,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0461 ||:  95%|#########4| 104/110 [01:10<00:03,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0462 ||:  95%|#########5| 105/110 [01:11<00:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0461 ||:  96%|#########6| 106/110 [01:11<00:02,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0462 ||:  97%|#########7| 107/110 [01:12<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0460 ||:  98%|#########8| 108/110 [01:12<00:01,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0461 ||:  99%|#########9| 109/110 [01:13<00:00,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0461 ||: 100%|##########| 110/110 [01:13<00:00,  1.89it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0461 ||: 100%|##########| 110/110 [01:13<00:00,  1.49it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.7500, no_result: 0.0312, loss: 1.0500 ||:   4%|4         | 1/24 [00:00<00:08,  2.73it/s]
+BLEU: 0.6943, acc: 0.7188, no_result: 0.0469, loss: 1.0125 ||:   8%|8         | 2/24 [00:00<00:07,  2.80it/s]
+BLEU: 0.6726, acc: 0.6875, no_result: 0.1042, loss: 1.1629 ||:  12%|#2        | 3/24 [00:01<00:07,  2.83it/s]
+BLEU: 0.7016, acc: 0.6875, no_result: 0.1016, loss: 1.0886 ||:  17%|#6        | 4/24 [00:01<00:07,  2.75it/s]
+BLEU: 0.7273, acc: 0.7125, no_result: 0.1000, loss: 1.0532 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7273, acc: 0.6927, no_result: 0.0990, loss: 1.0491 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.7328, acc: 0.7009, no_result: 0.0938, loss: 1.0326 ||:  29%|##9       | 7/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7293, acc: 0.6992, no_result: 0.0977, loss: 1.0616 ||:  33%|###3      | 8/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7160, acc: 0.7049, no_result: 0.0903, loss: 1.0501 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7282, acc: 0.6906, no_result: 0.0844, loss: 1.0650 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.7238, acc: 0.6761, no_result: 0.0852, loss: 1.1121 ||:  46%|####5     | 11/24 [00:03<00:04,  2.73it/s]
+BLEU: 0.7195, acc: 0.6771, no_result: 0.0859, loss: 1.1226 ||:  50%|#####     | 12/24 [00:04<00:04,  2.79it/s]
+BLEU: 0.7192, acc: 0.6755, no_result: 0.0865, loss: 1.1419 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7183, acc: 0.6585, no_result: 0.0893, loss: 1.1919 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.62it/s]
+BLEU: 0.7060, acc: 0.6396, no_result: 0.1104, loss: 1.2502 ||:  62%|######2   | 15/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.7068, acc: 0.6367, no_result: 0.1133, loss: 1.2763 ||:  67%|######6   | 16/24 [00:05<00:03,  2.39it/s]
+BLEU: 0.7023, acc: 0.6287, no_result: 0.1213, loss: 1.2672 ||:  71%|#######   | 17/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.6998, acc: 0.6302, no_result: 0.1233, loss: 1.2817 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.7030, acc: 0.6365, no_result: 0.1201, loss: 1.2594 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.7044, acc: 0.6391, no_result: 0.1172, loss: 1.2474 ||:  83%|########3 | 20/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.7009, acc: 0.6354, no_result: 0.1131, loss: 1.2555 ||:  88%|########7 | 21/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.7049, acc: 0.6250, no_result: 0.1122, loss: 1.2515 ||:  92%|#########1| 22/24 [00:08<00:00,  2.59it/s]
+BLEU: 0.7076, acc: 0.6332, no_result: 0.1087, loss: 1.2293 ||:  96%|#########5| 23/24 [00:08<00:00,  2.69it/s]
+BLEU: 0.7075, acc: 0.6345, no_result: 0.1088, loss: 1.2476 ||: 100%|##########| 24/24 [00:08<00:00,  3.32it/s]
+BLEU: 0.7075, acc: 0.6345, no_result: 0.1088, loss: 1.2476 ||: 100%|##########| 24/24 [00:08<00:00,  2.75it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:   1%|          | 1/110 [00:00<01:10,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:   2%|1         | 2/110 [00:01<01:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0379 ||:   3%|2         | 3/110 [00:01<01:05,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0449 ||:   4%|3         | 4/110 [00:02<01:02,  1.69it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0482 ||:   5%|4         | 5/110 [00:03<01:05,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0462 ||:   5%|5         | 6/110 [00:03<01:03,  1.65it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0435 ||:   6%|6         | 7/110 [00:04<01:02,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0455 ||:   7%|7         | 8/110 [00:05<01:08,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0447 ||:   8%|8         | 9/110 [00:05<01:09,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:   9%|9         | 10/110 [00:06<01:12,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  10%|#         | 11/110 [00:07<01:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0420 ||:  11%|#         | 12/110 [00:07<01:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0424 ||:  12%|#1        | 13/110 [00:08<01:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0415 ||:  13%|#2        | 14/110 [00:09<01:05,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0410 ||:  14%|#3        | 15/110 [00:09<01:02,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0425 ||:  15%|#4        | 16/110 [00:10<01:02,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0429 ||:  15%|#5        | 17/110 [00:11<01:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0423 ||:  16%|#6        | 18/110 [00:11<01:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0415 ||:  17%|#7        | 19/110 [00:12<01:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0412 ||:  18%|#8        | 20/110 [00:13<01:04,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0407 ||:  19%|#9        | 21/110 [00:14<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0408 ||:  20%|##        | 22/110 [00:14<01:03,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0405 ||:  21%|##        | 23/110 [00:15<01:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0418 ||:  22%|##1       | 24/110 [00:16<00:58,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0417 ||:  23%|##2       | 25/110 [00:16<00:59,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0419 ||:  24%|##3       | 26/110 [00:17<00:56,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0418 ||:  25%|##4       | 27/110 [00:18<00:54,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0415 ||:  25%|##5       | 28/110 [00:18<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0415 ||:  26%|##6       | 29/110 [00:19<00:51,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0424 ||:  27%|##7       | 30/110 [00:20<01:10,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  28%|##8       | 31/110 [00:21<01:04,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  29%|##9       | 32/110 [00:22<00:58,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0427 ||:  30%|###       | 33/110 [00:22<00:55,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0425 ||:  31%|###       | 34/110 [00:23<00:53,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0422 ||:  32%|###1      | 35/110 [00:24<00:51,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0423 ||:  33%|###2      | 36/110 [00:24<00:48,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0421 ||:  34%|###3      | 37/110 [00:25<00:48,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0421 ||:  35%|###4      | 38/110 [00:25<00:45,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0419 ||:  35%|###5      | 39/110 [00:26<00:47,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0417 ||:  36%|###6      | 40/110 [00:27<00:46,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0429 ||:  37%|###7      | 41/110 [00:27<00:45,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  38%|###8      | 42/110 [00:28<00:46,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0427 ||:  39%|###9      | 43/110 [00:29<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  40%|####      | 44/110 [00:30<00:45,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0426 ||:  41%|####      | 45/110 [00:30<00:44,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0425 ||:  42%|####1     | 46/110 [00:31<00:42,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0423 ||:  43%|####2     | 47/110 [00:31<00:40,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0422 ||:  44%|####3     | 48/110 [00:32<00:39,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0425 ||:  45%|####4     | 49/110 [00:33<00:39,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0431 ||:  45%|####5     | 50/110 [00:33<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  46%|####6     | 51/110 [00:34<00:41,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0425 ||:  47%|####7     | 52/110 [00:35<00:39,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0424 ||:  48%|####8     | 53/110 [00:36<00:40,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  49%|####9     | 54/110 [00:36<00:38,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  50%|#####     | 55/110 [00:37<00:36,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0427 ||:  51%|#####     | 56/110 [00:38<00:35,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0425 ||:  52%|#####1    | 57/110 [00:38<00:37,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0424 ||:  53%|#####2    | 58/110 [00:39<00:36,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  54%|#####3    | 59/110 [00:40<00:39,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0429 ||:  55%|#####4    | 60/110 [00:41<00:38,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  55%|#####5    | 61/110 [00:41<00:37,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0429 ||:  56%|#####6    | 62/110 [00:42<00:35,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0429 ||:  57%|#####7    | 63/110 [00:43<00:34,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  58%|#####8    | 64/110 [00:44<00:33,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  59%|#####9    | 65/110 [00:44<00:33,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0435 ||:  60%|######    | 66/110 [00:45<00:32,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  61%|######    | 67/110 [00:46<00:31,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  62%|######1   | 68/110 [00:46<00:29,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0434 ||:  63%|######2   | 69/110 [00:47<00:28,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0432 ||:  64%|######3   | 70/110 [00:48<00:28,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0431 ||:  65%|######4   | 71/110 [00:49<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0431 ||:  65%|######5   | 72/110 [00:49<00:25,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||:  66%|######6   | 73/110 [00:50<00:26,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0432 ||:  67%|######7   | 74/110 [00:51<00:24,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  68%|######8   | 75/110 [00:51<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  69%|######9   | 76/110 [00:52<00:23,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  70%|#######   | 77/110 [00:53<00:23,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  71%|#######   | 78/110 [00:53<00:22,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0432 ||:  72%|#######1  | 79/110 [00:54<00:23,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  73%|#######2  | 80/110 [00:55<00:21,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  74%|#######3  | 81/110 [00:56<00:19,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  75%|#######4  | 82/110 [00:56<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0438 ||:  75%|#######5  | 83/110 [00:57<00:17,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0439 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  77%|#######7  | 85/110 [00:58<00:18,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  78%|#######8  | 86/110 [00:59<00:17,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  79%|#######9  | 87/110 [01:00<00:16,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  80%|########  | 88/110 [01:00<00:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0439 ||:  81%|########  | 89/110 [01:01<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  82%|########1 | 90/110 [01:02<00:14,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  83%|########2 | 91/110 [01:02<00:13,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  84%|########3 | 92/110 [01:03<00:12,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  85%|########4 | 93/110 [01:04<00:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0435 ||:  85%|########5 | 94/110 [01:05<00:11,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  86%|########6 | 95/110 [01:05<00:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  87%|########7 | 96/110 [01:06<00:10,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0435 ||:  88%|########8 | 97/110 [01:07<00:09,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0435 ||:  89%|########9 | 98/110 [01:07<00:08,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0435 ||:  90%|######### | 99/110 [01:08<00:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0434 ||:  91%|######### | 100/110 [01:09<00:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  92%|#########1| 101/110 [01:10<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  93%|#########2| 102/110 [01:10<00:05,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0440 ||:  94%|#########3| 103/110 [01:11<00:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0438 ||:  95%|#########4| 104/110 [01:12<00:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  95%|#########5| 105/110 [01:12<00:03,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0435 ||:  96%|#########6| 106/110 [01:13<00:02,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0434 ||:  97%|#########7| 107/110 [01:14<00:02,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0434 ||:  98%|#########8| 108/110 [01:15<00:01,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||:  99%|#########9| 109/110 [01:15<00:00,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||: 100%|##########| 110/110 [01:16<00:00,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||: 100%|##########| 110/110 [01:16<00:00,  1.44it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7204, acc: 0.7500, no_result: 0.0312, loss: 1.0918 ||:   4%|4         | 1/24 [00:00<00:08,  2.69it/s]
+BLEU: 0.6922, acc: 0.7031, no_result: 0.0625, loss: 1.0400 ||:   8%|8         | 2/24 [00:00<00:07,  2.78it/s]
+BLEU: 0.6735, acc: 0.6667, no_result: 0.1042, loss: 1.1673 ||:  12%|#2        | 3/24 [00:01<00:07,  2.82it/s]
+BLEU: 0.7022, acc: 0.6641, no_result: 0.1172, loss: 1.0946 ||:  17%|#6        | 4/24 [00:01<00:07,  2.72it/s]
+BLEU: 0.7265, acc: 0.6937, no_result: 0.1062, loss: 1.0481 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7237, acc: 0.6667, no_result: 0.1094, loss: 1.0456 ||:  25%|##5       | 6/24 [00:02<00:06,  2.81it/s]
+BLEU: 0.7295, acc: 0.6786, no_result: 0.1027, loss: 1.0276 ||:  29%|##9       | 7/24 [00:02<00:05,  2.92it/s]
+BLEU: 0.7313, acc: 0.6836, no_result: 0.1055, loss: 1.0527 ||:  33%|###3      | 8/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7168, acc: 0.6771, no_result: 0.1076, loss: 1.0466 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7259, acc: 0.6656, no_result: 0.1000, loss: 1.0617 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.7267, acc: 0.6562, no_result: 0.1023, loss: 1.1075 ||:  46%|####5     | 11/24 [00:03<00:04,  2.73it/s]
+BLEU: 0.7228, acc: 0.6589, no_result: 0.0990, loss: 1.1191 ||:  50%|#####     | 12/24 [00:04<00:04,  2.82it/s]
+BLEU: 0.7184, acc: 0.6538, no_result: 0.1082, loss: 1.1387 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.73it/s]
+BLEU: 0.7198, acc: 0.6384, no_result: 0.1116, loss: 1.1829 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.61it/s]
+BLEU: 0.7081, acc: 0.6208, no_result: 0.1313, loss: 1.2409 ||:  62%|######2   | 15/24 [00:05<00:03,  2.43it/s]
+BLEU: 0.7055, acc: 0.6211, no_result: 0.1348, loss: 1.2687 ||:  67%|######6   | 16/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.7037, acc: 0.6158, no_result: 0.1415, loss: 1.2610 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.7018, acc: 0.6146, no_result: 0.1441, loss: 1.2759 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7052, acc: 0.6217, no_result: 0.1398, loss: 1.2540 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.7064, acc: 0.6234, no_result: 0.1359, loss: 1.2441 ||:  83%|########3 | 20/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.7058, acc: 0.6220, no_result: 0.1310, loss: 1.2521 ||:  88%|########7 | 21/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.7117, acc: 0.6165, no_result: 0.1278, loss: 1.2484 ||:  92%|#########1| 22/24 [00:08<00:00,  2.65it/s]
+BLEU: 0.7152, acc: 0.6236, no_result: 0.1250, loss: 1.2267 ||:  96%|#########5| 23/24 [00:08<00:00,  2.71it/s]
+BLEU: 0.7142, acc: 0.6208, no_result: 0.1244, loss: 1.2457 ||: 100%|##########| 24/24 [00:08<00:00,  3.35it/s]
+BLEU: 0.7142, acc: 0.6208, no_result: 0.1244, loss: 1.2457 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0532 ||:   1%|          | 1/110 [00:00<01:38,  1.11it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0402 ||:   2%|1         | 2/110 [00:01<01:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0415 ||:   3%|2         | 3/110 [00:02<01:20,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0427 ||:   4%|3         | 4/110 [00:03<01:19,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:   5%|4         | 5/110 [00:03<01:17,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0465 ||:   5%|5         | 6/110 [00:04<01:13,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:   6%|6         | 7/110 [00:05<01:13,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0499 ||:   7%|7         | 8/110 [00:05<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0491 ||:   8%|8         | 9/110 [00:06<01:09,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0482 ||:   9%|9         | 10/110 [00:07<01:10,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0466 ||:  10%|#         | 11/110 [00:07<01:06,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0475 ||:  11%|#         | 12/110 [00:08<01:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0478 ||:  12%|#1        | 13/110 [00:09<01:05,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  13%|#2        | 14/110 [00:09<01:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0471 ||:  14%|#3        | 15/110 [00:10<01:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0459 ||:  15%|#4        | 16/110 [00:11<01:02,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  15%|#5        | 17/110 [00:11<01:04,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0468 ||:  16%|#6        | 18/110 [00:12<01:03,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  17%|#7        | 19/110 [00:13<01:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0472 ||:  18%|#8        | 20/110 [00:14<01:26,  1.04it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0481 ||:  19%|#9        | 21/110 [00:15<01:17,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0470 ||:  20%|##        | 22/110 [00:16<01:11,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0459 ||:  21%|##        | 23/110 [00:16<01:08,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0452 ||:  22%|##1       | 24/110 [00:17<01:07,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0454 ||:  23%|##2       | 25/110 [00:18<01:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0446 ||:  24%|##3       | 26/110 [00:19<01:02,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0450 ||:  25%|##4       | 27/110 [00:19<01:00,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0455 ||:  25%|##5       | 28/110 [00:20<00:59,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0451 ||:  26%|##6       | 29/110 [00:21<00:56,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0450 ||:  27%|##7       | 30/110 [00:21<00:54,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0447 ||:  28%|##8       | 31/110 [00:22<00:53,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0442 ||:  29%|##9       | 32/110 [00:23<00:53,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0444 ||:  30%|###       | 33/110 [00:23<00:52,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0445 ||:  31%|###       | 34/110 [00:24<00:52,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0440 ||:  32%|###1      | 35/110 [00:25<00:52,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  33%|###2      | 36/110 [00:25<00:50,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0438 ||:  34%|###3      | 37/110 [00:26<00:50,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  35%|###4      | 38/110 [00:27<00:50,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  35%|###5      | 39/110 [00:28<00:48,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0442 ||:  36%|###6      | 40/110 [00:28<00:49,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0441 ||:  37%|###7      | 41/110 [00:29<00:49,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0438 ||:  38%|###8      | 42/110 [00:30<00:46,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0438 ||:  39%|###9      | 43/110 [00:30<00:47,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0438 ||:  40%|####      | 44/110 [00:31<00:46,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0441 ||:  41%|####      | 45/110 [00:32<00:44,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  42%|####1     | 46/110 [00:32<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0434 ||:  43%|####2     | 47/110 [00:33<00:42,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0435 ||:  44%|####3     | 48/110 [00:34<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0442 ||:  45%|####4     | 49/110 [00:35<00:43,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0443 ||:  45%|####5     | 50/110 [00:35<00:42,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0440 ||:  46%|####6     | 51/110 [00:36<00:41,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0442 ||:  47%|####7     | 52/110 [00:37<00:39,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0444 ||:  48%|####8     | 53/110 [00:37<00:37,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0443 ||:  49%|####9     | 54/110 [00:38<00:35,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0441 ||:  50%|#####     | 55/110 [00:38<00:35,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  51%|#####     | 56/110 [00:39<00:37,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||:  52%|#####1    | 57/110 [00:40<00:38,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0432 ||:  53%|#####2    | 58/110 [00:41<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||:  54%|#####3    | 59/110 [00:41<00:36,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||:  55%|#####4    | 60/110 [00:42<00:35,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0431 ||:  55%|#####5    | 61/110 [00:43<00:33,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0431 ||:  56%|#####6    | 62/110 [00:43<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0432 ||:  57%|#####7    | 63/110 [00:44<00:31,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0431 ||:  58%|#####8    | 64/110 [00:45<00:30,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0429 ||:  59%|#####9    | 65/110 [00:46<00:32,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0426 ||:  60%|######    | 66/110 [00:46<00:30,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0423 ||:  61%|######    | 67/110 [00:47<00:28,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  62%|######1   | 68/110 [00:47<00:27,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  63%|######2   | 69/110 [00:48<00:28,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  64%|######3   | 70/110 [00:49<00:27,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0429 ||:  65%|######4   | 71/110 [00:49<00:26,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||:  65%|######5   | 72/110 [00:50<00:25,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0434 ||:  66%|######6   | 73/110 [00:51<00:23,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||:  67%|######7   | 74/110 [00:51<00:22,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0434 ||:  68%|######8   | 75/110 [00:52<00:22,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0434 ||:  69%|######9   | 76/110 [00:53<00:21,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0435 ||:  70%|#######   | 77/110 [00:53<00:20,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||:  71%|#######   | 78/110 [00:54<00:20,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0432 ||:  72%|#######1  | 79/110 [00:54<00:19,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0431 ||:  73%|#######2  | 80/110 [00:55<00:19,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0429 ||:  74%|#######3  | 81/110 [00:56<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  75%|#######4  | 82/110 [00:56<00:18,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||:  75%|#######5  | 83/110 [00:57<00:17,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0434 ||:  76%|#######6  | 84/110 [00:58<00:16,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0437 ||:  77%|#######7  | 85/110 [00:59<00:18,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0436 ||:  78%|#######8  | 86/110 [00:59<00:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0434 ||:  79%|#######9  | 87/110 [01:00<00:15,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||:  80%|########  | 88/110 [01:01<00:14,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0432 ||:  81%|########  | 89/110 [01:01<00:13,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0434 ||:  82%|########1 | 90/110 [01:02<00:13,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0433 ||:  83%|########2 | 91/110 [01:03<00:13,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0432 ||:  84%|########3 | 92/110 [01:03<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0431 ||:  85%|########4 | 93/110 [01:04<00:11,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0431 ||:  85%|########5 | 94/110 [01:05<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0430 ||:  86%|########6 | 95/110 [01:05<00:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  87%|########7 | 96/110 [01:06<00:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  88%|########8 | 97/110 [01:07<00:09,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0431 ||:  89%|########9 | 98/110 [01:08<00:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0429 ||:  90%|######### | 99/110 [01:08<00:07,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0429 ||:  91%|######### | 100/110 [01:09<00:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  92%|#########1| 101/110 [01:09<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  93%|#########2| 102/110 [01:10<00:05,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  94%|#########3| 103/110 [01:11<00:04,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0427 ||:  95%|#########4| 104/110 [01:12<00:04,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0425 ||:  95%|#########5| 105/110 [01:12<00:03,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0424 ||:  96%|#########6| 106/110 [01:13<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0426 ||:  97%|#########7| 107/110 [01:13<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0425 ||:  98%|#########8| 108/110 [01:14<00:01,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||:  99%|#########9| 109/110 [01:15<00:00,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||: 100%|##########| 110/110 [01:15<00:00,  1.84it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0428 ||: 100%|##########| 110/110 [01:15<00:00,  1.46it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.6885, acc: 0.5938, no_result: 0.0938, loss: 1.1909 ||:   4%|4         | 1/24 [00:00<00:08,  2.65it/s]
+BLEU: 0.6615, acc: 0.6250, no_result: 0.0938, loss: 1.0798 ||:   8%|8         | 2/24 [00:00<00:07,  2.77it/s]
+BLEU: 0.6592, acc: 0.6250, no_result: 0.1250, loss: 1.2123 ||:  12%|#2        | 3/24 [00:01<00:07,  2.82it/s]
+BLEU: 0.6842, acc: 0.6250, no_result: 0.1328, loss: 1.1288 ||:  17%|#6        | 4/24 [00:01<00:07,  2.73it/s]
+BLEU: 0.7123, acc: 0.6562, no_result: 0.1313, loss: 1.0917 ||:  21%|##        | 5/24 [00:01<00:07,  2.69it/s]
+BLEU: 0.7145, acc: 0.6406, no_result: 0.1302, loss: 1.0778 ||:  25%|##5       | 6/24 [00:02<00:06,  2.79it/s]
+BLEU: 0.7236, acc: 0.6562, no_result: 0.1205, loss: 1.0577 ||:  29%|##9       | 7/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7263, acc: 0.6719, no_result: 0.1133, loss: 1.0835 ||:  33%|###3      | 8/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.7185, acc: 0.6736, no_result: 0.1215, loss: 1.0753 ||:  38%|###7      | 9/24 [00:03<00:05,  2.89it/s]
+BLEU: 0.7283, acc: 0.6594, no_result: 0.1219, loss: 1.0959 ||:  42%|####1     | 10/24 [00:03<00:04,  2.82it/s]
+BLEU: 0.7252, acc: 0.6562, no_result: 0.1193, loss: 1.1493 ||:  46%|####5     | 11/24 [00:03<00:04,  2.71it/s]
+BLEU: 0.7231, acc: 0.6667, no_result: 0.1146, loss: 1.1580 ||:  50%|#####     | 12/24 [00:04<00:04,  2.80it/s]
+BLEU: 0.7206, acc: 0.6611, no_result: 0.1178, loss: 1.1745 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7219, acc: 0.6473, no_result: 0.1161, loss: 1.2251 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.61it/s]
+BLEU: 0.7140, acc: 0.6292, no_result: 0.1354, loss: 1.2814 ||:  62%|######2   | 15/24 [00:05<00:03,  2.42it/s]
+BLEU: 0.7128, acc: 0.6211, no_result: 0.1348, loss: 1.3075 ||:  67%|######6   | 16/24 [00:06<00:03,  2.38it/s]
+BLEU: 0.7132, acc: 0.6176, no_result: 0.1415, loss: 1.2967 ||:  71%|#######   | 17/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.7116, acc: 0.6146, no_result: 0.1476, loss: 1.3074 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.42it/s]
+BLEU: 0.7145, acc: 0.6217, no_result: 0.1431, loss: 1.2868 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.7168, acc: 0.6250, no_result: 0.1391, loss: 1.2749 ||:  83%|########3 | 20/24 [00:07<00:01,  2.62it/s]
+BLEU: 0.7142, acc: 0.6235, no_result: 0.1339, loss: 1.2798 ||:  88%|########7 | 21/24 [00:07<00:01,  2.60it/s]
+BLEU: 0.7203, acc: 0.6207, no_result: 0.1321, loss: 1.2749 ||:  92%|#########1| 22/24 [00:08<00:00,  2.50it/s]
+BLEU: 0.7233, acc: 0.6291, no_result: 0.1264, loss: 1.2523 ||:  96%|#########5| 23/24 [00:08<00:00,  2.60it/s]
+BLEU: 0.7230, acc: 0.6260, no_result: 0.1257, loss: 1.2731 ||: 100%|##########| 24/24 [00:08<00:00,  3.21it/s]
+BLEU: 0.7230, acc: 0.6260, no_result: 0.1257, loss: 1.2731 ||: 100%|##########| 24/24 [00:08<00:00,  2.71it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0423 ||:   1%|          | 1/110 [00:00<01:30,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||:   2%|1         | 2/110 [00:01<01:35,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0361 ||:   3%|2         | 3/110 [00:02<01:24,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0337 ||:   4%|3         | 4/110 [00:03<01:19,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0340 ||:   5%|4         | 5/110 [00:03<01:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0342 ||:   5%|5         | 6/110 [00:04<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:   6%|6         | 7/110 [00:05<01:13,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0357 ||:   7%|7         | 8/110 [00:05<01:10,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0338 ||:   8%|8         | 9/110 [00:06<01:13,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0367 ||:   9%|9         | 10/110 [00:08<01:39,  1.00it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0369 ||:  10%|#         | 11/110 [00:08<01:31,  1.08it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0361 ||:  11%|#         | 12/110 [00:09<01:22,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0360 ||:  12%|#1        | 13/110 [00:10<01:16,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0356 ||:  13%|#2        | 14/110 [00:10<01:10,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0354 ||:  14%|#3        | 15/110 [00:11<01:07,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0352 ||:  15%|#4        | 16/110 [00:12<01:09,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0347 ||:  15%|#5        | 17/110 [00:12<01:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0341 ||:  16%|#6        | 18/110 [00:13<01:08,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0343 ||:  17%|#7        | 19/110 [00:14<01:04,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:  18%|#8        | 20/110 [00:15<01:05,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0355 ||:  19%|#9        | 21/110 [00:15<01:01,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0351 ||:  20%|##        | 22/110 [00:16<01:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0357 ||:  21%|##        | 23/110 [00:17<00:59,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0352 ||:  22%|##1       | 24/110 [00:17<00:58,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  23%|##2       | 25/110 [00:18<00:55,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0355 ||:  24%|##3       | 26/110 [00:18<00:52,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0352 ||:  25%|##4       | 27/110 [00:19<00:52,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0347 ||:  25%|##5       | 28/110 [00:20<00:51,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0342 ||:  26%|##6       | 29/110 [00:20<00:49,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0350 ||:  27%|##7       | 30/110 [00:21<00:50,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:  28%|##8       | 31/110 [00:22<00:50,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0343 ||:  29%|##9       | 32/110 [00:22<00:51,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0344 ||:  30%|###       | 33/110 [00:23<00:51,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0343 ||:  31%|###       | 34/110 [00:24<00:50,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  32%|###1      | 35/110 [00:24<00:48,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:  33%|###2      | 36/110 [00:25<00:49,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  34%|###3      | 37/110 [00:26<00:52,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  35%|###4      | 38/110 [00:27<00:49,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0356 ||:  35%|###5      | 39/110 [00:27<00:50,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0361 ||:  36%|###6      | 40/110 [00:28<00:48,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  37%|###7      | 41/110 [00:29<00:48,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0368 ||:  38%|###8      | 42/110 [00:29<00:48,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0366 ||:  39%|###9      | 43/110 [00:30<00:47,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0366 ||:  40%|####      | 44/110 [00:31<00:45,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0364 ||:  41%|####      | 45/110 [00:31<00:45,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0369 ||:  42%|####1     | 46/110 [00:32<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0368 ||:  43%|####2     | 47/110 [00:33<00:42,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0367 ||:  44%|####3     | 48/110 [00:33<00:43,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0365 ||:  45%|####4     | 49/110 [00:34<00:41,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0372 ||:  45%|####5     | 50/110 [00:35<00:40,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0371 ||:  46%|####6     | 51/110 [00:36<00:40,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0371 ||:  47%|####7     | 52/110 [00:36<00:39,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0368 ||:  48%|####8     | 53/110 [00:37<00:39,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0367 ||:  49%|####9     | 54/110 [00:38<00:39,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0365 ||:  50%|#####     | 55/110 [00:38<00:36,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0366 ||:  51%|#####     | 56/110 [00:39<00:38,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0363 ||:  52%|#####1    | 57/110 [00:40<00:36,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0362 ||:  53%|#####2    | 58/110 [00:40<00:36,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0363 ||:  54%|#####3    | 59/110 [00:41<00:36,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0361 ||:  55%|#####4    | 60/110 [00:42<00:34,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0365 ||:  55%|#####5    | 61/110 [00:42<00:33,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0365 ||:  56%|#####6    | 62/110 [00:43<00:34,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0369 ||:  57%|#####7    | 63/110 [00:44<00:34,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0369 ||:  58%|#####8    | 64/110 [00:45<00:33,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0369 ||:  59%|#####9    | 65/110 [00:45<00:31,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0367 ||:  60%|######    | 66/110 [00:46<00:30,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0367 ||:  61%|######    | 67/110 [00:47<00:33,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0368 ||:  62%|######1   | 68/110 [00:48<00:31,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0366 ||:  63%|######2   | 69/110 [00:49<00:32,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0367 ||:  64%|######3   | 70/110 [00:49<00:31,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0367 ||:  65%|######4   | 71/110 [00:50<00:28,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0366 ||:  65%|######5   | 72/110 [00:51<00:27,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0371 ||:  66%|######6   | 73/110 [00:51<00:25,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0370 ||:  67%|######7   | 74/110 [00:52<00:24,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0370 ||:  68%|######8   | 75/110 [00:53<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0371 ||:  69%|######9   | 76/110 [00:53<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0369 ||:  70%|#######   | 77/110 [00:54<00:22,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0370 ||:  71%|#######   | 78/110 [00:55<00:21,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0371 ||:  72%|#######1  | 79/110 [00:55<00:21,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0370 ||:  73%|#######2  | 80/110 [00:56<00:20,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0369 ||:  74%|#######3  | 81/110 [00:57<00:20,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0368 ||:  75%|#######4  | 82/110 [00:57<00:19,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0367 ||:  75%|#######5  | 83/110 [00:58<00:18,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0369 ||:  76%|#######6  | 84/110 [00:59<00:17,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0371 ||:  77%|#######7  | 85/110 [00:59<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0373 ||:  78%|#######8  | 86/110 [01:00<00:15,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0372 ||:  79%|#######9  | 87/110 [01:01<00:15,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0373 ||:  80%|########  | 88/110 [01:01<00:14,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0372 ||:  81%|########  | 89/110 [01:02<00:14,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0371 ||:  82%|########1 | 90/110 [01:03<00:13,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0370 ||:  83%|########2 | 91/110 [01:03<00:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0369 ||:  84%|########3 | 92/110 [01:04<00:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0369 ||:  85%|########4 | 93/110 [01:05<00:11,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0371 ||:  85%|########5 | 94/110 [01:06<00:11,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0373 ||:  86%|########6 | 95/110 [01:06<00:10,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0373 ||:  87%|########7 | 96/110 [01:07<00:10,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0372 ||:  88%|########8 | 97/110 [01:08<00:09,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0371 ||:  89%|########9 | 98/110 [01:08<00:08,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0371 ||:  90%|######### | 99/110 [01:09<00:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0373 ||:  91%|######### | 100/110 [01:10<00:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0373 ||:  92%|#########1| 101/110 [01:10<00:06,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0372 ||:  93%|#########2| 102/110 [01:11<00:05,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0374 ||:  94%|#########3| 103/110 [01:12<00:04,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0378 ||:  95%|#########4| 104/110 [01:12<00:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0380 ||:  95%|#########5| 105/110 [01:13<00:03,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0382 ||:  96%|#########6| 106/110 [01:14<00:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0381 ||:  97%|#########7| 107/110 [01:15<00:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0380 ||:  98%|#########8| 108/110 [01:15<00:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0380 ||:  99%|#########9| 109/110 [01:16<00:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0381 ||: 100%|##########| 110/110 [01:17<00:00,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0381 ||: 100%|##########| 110/110 [01:17<00:00,  1.42it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7146, acc: 0.6250, no_result: 0.0938, loss: 1.1898 ||:   4%|4         | 1/24 [00:00<00:08,  2.62it/s]
+BLEU: 0.7142, acc: 0.6562, no_result: 0.0938, loss: 1.0830 ||:   8%|8         | 2/24 [00:00<00:08,  2.74it/s]
+BLEU: 0.6897, acc: 0.6354, no_result: 0.1250, loss: 1.2180 ||:  12%|#2        | 3/24 [00:01<00:07,  2.77it/s]
+BLEU: 0.7092, acc: 0.6484, no_result: 0.1172, loss: 1.1296 ||:  17%|#6        | 4/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7354, acc: 0.6813, no_result: 0.1125, loss: 1.0890 ||:  21%|##        | 5/24 [00:01<00:07,  2.67it/s]
+BLEU: 0.7372, acc: 0.6667, no_result: 0.1146, loss: 1.0823 ||:  25%|##5       | 6/24 [00:02<00:06,  2.76it/s]
+BLEU: 0.7423, acc: 0.6786, no_result: 0.1071, loss: 1.0648 ||:  29%|##9       | 7/24 [00:02<00:05,  2.88it/s]
+BLEU: 0.7427, acc: 0.6875, no_result: 0.1055, loss: 1.0823 ||:  33%|###3      | 8/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.7354, acc: 0.6806, no_result: 0.1146, loss: 1.0790 ||:  38%|###7      | 9/24 [00:03<00:05,  2.82it/s]
+BLEU: 0.7426, acc: 0.6687, no_result: 0.1156, loss: 1.0940 ||:  42%|####1     | 10/24 [00:03<00:05,  2.74it/s]
+BLEU: 0.7408, acc: 0.6591, no_result: 0.1165, loss: 1.1427 ||:  46%|####5     | 11/24 [00:04<00:04,  2.63it/s]
+BLEU: 0.7365, acc: 0.6641, no_result: 0.1146, loss: 1.1535 ||:  50%|#####     | 12/24 [00:04<00:04,  2.72it/s]
+BLEU: 0.7339, acc: 0.6611, no_result: 0.1178, loss: 1.1687 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.64it/s]
+BLEU: 0.7332, acc: 0.6518, no_result: 0.1161, loss: 1.2168 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.53it/s]
+BLEU: 0.7230, acc: 0.6292, no_result: 0.1354, loss: 1.2732 ||:  62%|######2   | 15/24 [00:05<00:03,  2.35it/s]
+BLEU: 0.7222, acc: 0.6270, no_result: 0.1367, loss: 1.2987 ||:  67%|######6   | 16/24 [00:06<00:03,  2.31it/s]
+BLEU: 0.7209, acc: 0.6213, no_result: 0.1434, loss: 1.2889 ||:  71%|#######   | 17/24 [00:06<00:02,  2.35it/s]
+BLEU: 0.7161, acc: 0.6181, no_result: 0.1476, loss: 1.2994 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.34it/s]
+BLEU: 0.7176, acc: 0.6234, no_result: 0.1431, loss: 1.2777 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.45it/s]
+BLEU: 0.7213, acc: 0.6297, no_result: 0.1391, loss: 1.2650 ||:  83%|########3 | 20/24 [00:07<00:01,  2.59it/s]
+BLEU: 0.7180, acc: 0.6280, no_result: 0.1339, loss: 1.2701 ||:  88%|########7 | 21/24 [00:08<00:01,  2.61it/s]
+BLEU: 0.7210, acc: 0.6222, no_result: 0.1307, loss: 1.2648 ||:  92%|#########1| 22/24 [00:08<00:00,  2.60it/s]
+BLEU: 0.7240, acc: 0.6304, no_result: 0.1264, loss: 1.2421 ||:  96%|#########5| 23/24 [00:08<00:00,  2.67it/s]
+BLEU: 0.7240, acc: 0.6273, no_result: 0.1257, loss: 1.2639 ||: 100%|##########| 24/24 [00:08<00:00,  3.30it/s]
+BLEU: 0.7240, acc: 0.6273, no_result: 0.1257, loss: 1.2639 ||: 100%|##########| 24/24 [00:08<00:00,  2.68it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:   1%|          | 1/110 [00:00<01:17,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0268 ||:   2%|1         | 2/110 [00:01<01:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:   3%|2         | 3/110 [00:02<01:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:   4%|3         | 4/110 [00:02<01:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:   5%|4         | 5/110 [00:03<01:11,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:   5%|5         | 6/110 [00:04<01:08,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0304 ||:   6%|6         | 7/110 [00:04<01:06,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:   7%|7         | 8/110 [00:05<01:07,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0331 ||:   8%|8         | 9/110 [00:06<01:06,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0343 ||:   9%|9         | 10/110 [00:06<01:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0338 ||:  10%|#         | 11/110 [00:07<01:03,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0354 ||:  11%|#         | 12/110 [00:07<01:05,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0353 ||:  12%|#1        | 13/110 [00:08<01:05,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0343 ||:  13%|#2        | 14/110 [00:09<01:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0356 ||:  14%|#3        | 15/110 [00:10<01:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0366 ||:  15%|#4        | 16/110 [00:10<01:03,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0385 ||:  15%|#5        | 17/110 [00:11<01:06,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0380 ||:  16%|#6        | 18/110 [00:12<01:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0379 ||:  17%|#7        | 19/110 [00:12<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0375 ||:  18%|#8        | 20/110 [00:13<01:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0366 ||:  19%|#9        | 21/110 [00:14<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0363 ||:  20%|##        | 22/110 [00:14<01:00,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  21%|##        | 23/110 [00:15<00:59,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0365 ||:  22%|##1       | 24/110 [00:16<00:57,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0361 ||:  23%|##2       | 25/110 [00:16<00:56,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0356 ||:  24%|##3       | 26/110 [00:17<00:54,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:  25%|##4       | 27/110 [00:18<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  25%|##5       | 28/110 [00:18<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  26%|##6       | 29/110 [00:19<00:51,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0342 ||:  27%|##7       | 30/110 [00:20<00:51,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0342 ||:  28%|##8       | 31/110 [00:20<00:54,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0340 ||:  29%|##9       | 32/110 [00:21<00:54,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0337 ||:  30%|###       | 33/110 [00:22<00:52,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0336 ||:  31%|###       | 34/110 [00:22<00:52,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0336 ||:  32%|###1      | 35/110 [00:23<00:53,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0341 ||:  33%|###2      | 36/110 [00:24<00:52,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0338 ||:  34%|###3      | 37/110 [00:25<00:52,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  35%|###4      | 38/110 [00:25<00:50,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0332 ||:  35%|###5      | 39/110 [00:26<00:50,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:  36%|###6      | 40/110 [00:27<00:49,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0333 ||:  37%|###7      | 41/110 [00:27<00:47,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0328 ||:  38%|###8      | 42/110 [00:28<00:46,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0330 ||:  39%|###9      | 43/110 [00:29<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  40%|####      | 44/110 [00:30<00:47,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0337 ||:  41%|####      | 45/110 [00:30<00:45,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:  42%|####1     | 46/110 [00:31<00:46,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  43%|####2     | 47/110 [00:32<00:43,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  44%|####3     | 48/110 [00:32<00:43,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0332 ||:  45%|####4     | 49/110 [00:33<00:42,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0331 ||:  45%|####5     | 50/110 [00:34<00:42,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0330 ||:  46%|####6     | 51/110 [00:34<00:39,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0331 ||:  47%|####7     | 52/110 [00:35<00:38,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:  48%|####8     | 53/110 [00:36<00:38,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:  49%|####9     | 54/110 [00:36<00:39,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  50%|#####     | 55/110 [00:37<00:37,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0337 ||:  51%|#####     | 56/110 [00:38<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0342 ||:  52%|#####1    | 57/110 [00:39<00:37,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0339 ||:  53%|#####2    | 58/110 [00:39<00:35,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0338 ||:  54%|#####3    | 59/110 [00:40<00:37,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0340 ||:  55%|#####4    | 60/110 [00:41<00:40,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0345 ||:  55%|#####5    | 61/110 [00:42<00:36,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0344 ||:  56%|#####6    | 62/110 [00:42<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0344 ||:  57%|#####7    | 63/110 [00:43<00:32,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  58%|#####8    | 64/110 [00:44<00:32,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:  59%|#####9    | 65/110 [00:44<00:31,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||:  60%|######    | 66/110 [00:45<00:29,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  61%|######    | 67/110 [00:46<00:29,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0347 ||:  62%|######1   | 68/110 [00:46<00:30,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0344 ||:  63%|######2   | 69/110 [00:47<00:28,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0344 ||:  64%|######3   | 70/110 [00:48<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  65%|######4   | 71/110 [00:48<00:26,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  65%|######5   | 72/110 [00:49<00:25,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0344 ||:  66%|######6   | 73/110 [00:50<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:  67%|######7   | 74/110 [00:50<00:24,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  68%|######8   | 75/110 [00:51<00:24,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0345 ||:  69%|######9   | 76/110 [00:52<00:26,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0345 ||:  70%|#######   | 77/110 [00:53<00:24,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0345 ||:  71%|#######   | 78/110 [00:54<00:23,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0347 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0347 ||:  73%|#######2  | 80/110 [00:55<00:20,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:  74%|#######3  | 81/110 [00:56<00:20,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0351 ||:  75%|#######4  | 82/110 [00:56<00:19,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0350 ||:  75%|#######5  | 83/110 [00:57<00:19,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0351 ||:  76%|#######6  | 84/110 [00:58<00:18,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0351 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0354 ||:  78%|#######8  | 86/110 [00:59<00:15,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0355 ||:  79%|#######9  | 87/110 [00:59<00:14,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0354 ||:  80%|########  | 88/110 [01:00<00:14,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0356 ||:  81%|########  | 89/110 [01:01<00:13,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0356 ||:  82%|########1 | 90/110 [01:02<00:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0355 ||:  83%|########2 | 91/110 [01:02<00:12,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0354 ||:  84%|########3 | 92/110 [01:03<00:11,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0355 ||:  85%|########4 | 93/110 [01:03<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0357 ||:  85%|########5 | 94/110 [01:04<00:10,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  86%|########6 | 95/110 [01:05<00:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  87%|########7 | 96/110 [01:05<00:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  88%|########8 | 97/110 [01:06<00:08,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  89%|########9 | 98/110 [01:07<00:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  90%|######### | 99/110 [01:08<00:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  91%|######### | 100/110 [01:09<00:09,  1.06it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  92%|#########1| 101/110 [01:10<00:07,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  93%|#########2| 102/110 [01:10<00:06,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  94%|#########3| 103/110 [01:11<00:05,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0360 ||:  95%|#########4| 104/110 [01:12<00:04,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  95%|#########5| 105/110 [01:12<00:03,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  96%|#########6| 106/110 [01:13<00:02,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  97%|#########7| 107/110 [01:14<00:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  98%|#########8| 108/110 [01:15<00:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0360 ||:  99%|#########9| 109/110 [01:15<00:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||: 100%|##########| 110/110 [01:16<00:00,  1.72it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||: 100%|##########| 110/110 [01:16<00:00,  1.45it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7204, acc: 0.6562, no_result: 0.1250, loss: 1.1899 ||:   4%|4         | 1/24 [00:00<00:09,  2.55it/s]
+BLEU: 0.7227, acc: 0.7031, no_result: 0.0938, loss: 1.0796 ||:   8%|8         | 2/24 [00:00<00:08,  2.68it/s]
+BLEU: 0.6948, acc: 0.6667, no_result: 0.1146, loss: 1.2035 ||:  12%|#2        | 3/24 [00:01<00:07,  2.73it/s]
+BLEU: 0.7143, acc: 0.6719, no_result: 0.1094, loss: 1.1210 ||:  17%|#6        | 4/24 [00:01<00:07,  2.65it/s]
+BLEU: 0.7384, acc: 0.6937, no_result: 0.1062, loss: 1.0872 ||:  21%|##        | 5/24 [00:01<00:07,  2.62it/s]
+BLEU: 0.7374, acc: 0.6667, no_result: 0.1094, loss: 1.0887 ||:  25%|##5       | 6/24 [00:02<00:06,  2.73it/s]
+BLEU: 0.7470, acc: 0.6786, no_result: 0.1027, loss: 1.0681 ||:  29%|##9       | 7/24 [00:02<00:05,  2.86it/s]
+BLEU: 0.7461, acc: 0.6914, no_result: 0.0977, loss: 1.0864 ||:  33%|###3      | 8/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.7357, acc: 0.6944, no_result: 0.1042, loss: 1.0851 ||:  38%|###7      | 9/24 [00:03<00:05,  2.84it/s]
+BLEU: 0.7450, acc: 0.6781, no_result: 0.1031, loss: 1.0965 ||:  42%|####1     | 10/24 [00:03<00:05,  2.76it/s]
+BLEU: 0.7412, acc: 0.6676, no_result: 0.1108, loss: 1.1430 ||:  46%|####5     | 11/24 [00:04<00:04,  2.64it/s]
+BLEU: 0.7375, acc: 0.6771, no_result: 0.1042, loss: 1.1556 ||:  50%|#####     | 12/24 [00:04<00:04,  2.74it/s]
+BLEU: 0.7343, acc: 0.6707, no_result: 0.1106, loss: 1.1694 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.66it/s]
+BLEU: 0.7327, acc: 0.6585, no_result: 0.1071, loss: 1.2190 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.57it/s]
+BLEU: 0.7259, acc: 0.6375, no_result: 0.1271, loss: 1.2744 ||:  62%|######2   | 15/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.7229, acc: 0.6367, no_result: 0.1289, loss: 1.2993 ||:  67%|######6   | 16/24 [00:06<00:03,  2.34it/s]
+BLEU: 0.7208, acc: 0.6305, no_result: 0.1360, loss: 1.2883 ||:  71%|#######   | 17/24 [00:06<00:02,  2.40it/s]
+BLEU: 0.7167, acc: 0.6285, no_result: 0.1424, loss: 1.2995 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.37it/s]
+BLEU: 0.7191, acc: 0.6349, no_result: 0.1382, loss: 1.2785 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.47it/s]
+BLEU: 0.7218, acc: 0.6406, no_result: 0.1344, loss: 1.2665 ||:  83%|########3 | 20/24 [00:07<00:01,  2.61it/s]
+BLEU: 0.7214, acc: 0.6384, no_result: 0.1295, loss: 1.2740 ||:  88%|########7 | 21/24 [00:08<00:01,  2.62it/s]
+BLEU: 0.7248, acc: 0.6321, no_result: 0.1264, loss: 1.2719 ||:  92%|#########1| 22/24 [00:08<00:00,  2.59it/s]
+BLEU: 0.7280, acc: 0.6386, no_result: 0.1236, loss: 1.2490 ||:  96%|#########5| 23/24 [00:08<00:00,  2.66it/s]
+BLEU: 0.7279, acc: 0.6351, no_result: 0.1231, loss: 1.2682 ||: 100%|##########| 24/24 [00:08<00:00,  3.29it/s]
+BLEU: 0.7279, acc: 0.6351, no_result: 0.1231, loss: 1.2682 ||: 100%|##########| 24/24 [00:08<00:00,  2.69it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||:   1%|          | 1/110 [00:00<01:19,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0389 ||:   2%|1         | 2/110 [00:01<01:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0363 ||:   3%|2         | 3/110 [00:01<01:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0308 ||:   4%|3         | 4/110 [00:02<01:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0310 ||:   5%|4         | 5/110 [00:03<01:11,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0281 ||:   5%|5         | 6/110 [00:03<01:08,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:   6%|6         | 7/110 [00:04<01:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0271 ||:   7%|7         | 8/110 [00:05<01:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:   8%|8         | 9/110 [00:06<01:12,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:   9%|9         | 10/110 [00:06<01:10,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0269 ||:  10%|#         | 11/110 [00:07<01:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  11%|#         | 12/110 [00:08<01:04,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  12%|#1        | 13/110 [00:08<01:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0277 ||:  13%|#2        | 14/110 [00:09<01:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  14%|#3        | 15/110 [00:10<01:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  15%|#4        | 16/110 [00:11<01:10,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  15%|#5        | 17/110 [00:11<01:10,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  16%|#6        | 18/110 [00:12<01:08,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  17%|#7        | 19/110 [00:13<01:06,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  18%|#8        | 20/110 [00:13<01:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0303 ||:  19%|#9        | 21/110 [00:14<00:59,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0314 ||:  20%|##        | 22/110 [00:15<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0309 ||:  21%|##        | 23/110 [00:15<00:58,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0312 ||:  22%|##1       | 24/110 [00:16<00:58,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0310 ||:  23%|##2       | 25/110 [00:17<00:58,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0307 ||:  24%|##3       | 26/110 [00:18<00:56,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0303 ||:  25%|##4       | 27/110 [00:18<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0299 ||:  25%|##5       | 28/110 [00:19<00:52,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  26%|##6       | 29/110 [00:19<00:53,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  27%|##7       | 30/110 [00:20<00:53,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  28%|##8       | 31/110 [00:21<00:53,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0302 ||:  29%|##9       | 32/110 [00:22<00:58,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0303 ||:  30%|###       | 33/110 [00:22<00:54,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0305 ||:  31%|###       | 34/110 [00:23<00:50,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0302 ||:  32%|###1      | 35/110 [00:24<00:49,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0301 ||:  33%|###2      | 36/110 [00:24<00:52,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0301 ||:  34%|###3      | 37/110 [00:25<00:51,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0301 ||:  35%|###4      | 38/110 [00:26<00:49,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0307 ||:  35%|###5      | 39/110 [00:26<00:46,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0306 ||:  36%|###6      | 40/110 [00:27<00:46,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0308 ||:  37%|###7      | 41/110 [00:28<00:44,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0307 ||:  38%|###8      | 42/110 [00:28<00:46,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0307 ||:  39%|###9      | 43/110 [00:29<00:48,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0310 ||:  40%|####      | 44/110 [00:30<00:46,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0310 ||:  41%|####      | 45/110 [00:30<00:44,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0312 ||:  42%|####1     | 46/110 [00:31<00:42,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0312 ||:  43%|####2     | 47/110 [00:32<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0311 ||:  44%|####3     | 48/110 [00:32<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0309 ||:  45%|####4     | 49/110 [00:33<00:42,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  45%|####5     | 50/110 [00:34<00:41,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0318 ||:  46%|####6     | 51/110 [00:35<00:40,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  47%|####7     | 52/110 [00:35<00:39,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  48%|####8     | 53/110 [00:36<00:41,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0327 ||:  49%|####9     | 54/110 [00:37<00:39,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0326 ||:  50%|#####     | 55/110 [00:37<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0326 ||:  51%|#####     | 56/110 [00:38<00:37,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0325 ||:  52%|#####1    | 57/110 [00:39<00:36,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0324 ||:  53%|#####2    | 58/110 [00:39<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  54%|#####3    | 59/110 [00:40<00:37,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0326 ||:  55%|#####4    | 60/110 [00:41<00:36,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0324 ||:  55%|#####5    | 61/110 [00:42<00:35,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:  56%|#####6    | 62/110 [00:42<00:34,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0321 ||:  57%|#####7    | 63/110 [00:43<00:32,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:  58%|#####8    | 64/110 [00:44<00:30,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  59%|#####9    | 65/110 [00:44<00:29,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0325 ||:  60%|######    | 66/110 [00:45<00:28,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0324 ||:  61%|######    | 67/110 [00:46<00:28,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0324 ||:  62%|######1   | 68/110 [00:46<00:27,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:  63%|######2   | 69/110 [00:47<00:27,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  64%|######3   | 70/110 [00:48<00:25,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  65%|######4   | 71/110 [00:48<00:25,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  65%|######5   | 72/110 [00:49<00:23,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0336 ||:  66%|######6   | 73/110 [00:49<00:23,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  67%|######7   | 74/110 [00:50<00:22,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0333 ||:  68%|######8   | 75/110 [00:51<00:22,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0333 ||:  69%|######9   | 76/110 [00:51<00:21,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0333 ||:  70%|#######   | 77/110 [00:52<00:22,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0332 ||:  71%|#######   | 78/110 [00:53<00:21,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0331 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0333 ||:  73%|#######2  | 80/110 [00:54<00:21,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0333 ||:  74%|#######3  | 81/110 [00:55<00:20,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0332 ||:  75%|#######4  | 82/110 [00:56<00:19,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0331 ||:  75%|#######5  | 83/110 [00:56<00:18,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0330 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0329 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0329 ||:  78%|#######8  | 86/110 [00:58<00:15,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0327 ||:  79%|#######9  | 87/110 [00:59<00:14,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0327 ||:  80%|########  | 88/110 [00:59<00:14,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0330 ||:  81%|########  | 89/110 [01:00<00:13,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0329 ||:  82%|########1 | 90/110 [01:01<00:17,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  83%|########2 | 91/110 [01:02<00:15,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:  84%|########3 | 92/110 [01:03<00:13,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0336 ||:  85%|########4 | 93/110 [01:03<00:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0338 ||:  85%|########5 | 94/110 [01:04<00:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0341 ||:  86%|########6 | 95/110 [01:05<00:10,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0341 ||:  87%|########7 | 96/110 [01:05<00:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0341 ||:  88%|########8 | 97/110 [01:06<00:09,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0341 ||:  89%|########9 | 98/110 [01:07<00:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0342 ||:  90%|######### | 99/110 [01:08<00:07,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0342 ||:  91%|######### | 100/110 [01:08<00:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0342 ||:  92%|#########1| 101/110 [01:09<00:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0343 ||:  93%|#########2| 102/110 [01:09<00:05,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0343 ||:  94%|#########3| 103/110 [01:10<00:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0343 ||:  95%|#########4| 104/110 [01:11<00:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0344 ||:  95%|#########5| 105/110 [01:12<00:03,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0347 ||:  96%|#########6| 106/110 [01:12<00:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||:  97%|#########7| 107/110 [01:13<00:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:  98%|#########8| 108/110 [01:14<00:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0350 ||:  99%|#########9| 109/110 [01:15<00:00,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||: 100%|##########| 110/110 [01:15<00:00,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||: 100%|##########| 110/110 [01:15<00:00,  1.46it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7323, acc: 0.6250, no_result: 0.0938, loss: 1.1441 ||:   4%|4         | 1/24 [00:00<00:08,  2.68it/s]
+BLEU: 0.7026, acc: 0.6719, no_result: 0.0938, loss: 1.0776 ||:   8%|8         | 2/24 [00:00<00:07,  2.76it/s]
+BLEU: 0.6886, acc: 0.6458, no_result: 0.1250, loss: 1.2160 ||:  12%|#2        | 3/24 [00:01<00:07,  2.79it/s]
+BLEU: 0.7098, acc: 0.6562, no_result: 0.1172, loss: 1.1277 ||:  17%|#6        | 4/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7348, acc: 0.6813, no_result: 0.1125, loss: 1.0846 ||:  21%|##        | 5/24 [00:01<00:07,  2.66it/s]
+BLEU: 0.7345, acc: 0.6771, no_result: 0.1094, loss: 1.0854 ||:  25%|##5       | 6/24 [00:02<00:06,  2.76it/s]
+BLEU: 0.7417, acc: 0.6875, no_result: 0.1027, loss: 1.0651 ||:  29%|##9       | 7/24 [00:02<00:05,  2.89it/s]
+BLEU: 0.7356, acc: 0.6953, no_result: 0.1016, loss: 1.0850 ||:  33%|###3      | 8/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7275, acc: 0.7049, no_result: 0.1007, loss: 1.0793 ||:  38%|###7      | 9/24 [00:03<00:05,  2.86it/s]
+BLEU: 0.7398, acc: 0.6906, no_result: 0.1031, loss: 1.0957 ||:  42%|####1     | 10/24 [00:03<00:05,  2.78it/s]
+BLEU: 0.7368, acc: 0.6761, no_result: 0.1080, loss: 1.1468 ||:  46%|####5     | 11/24 [00:04<00:04,  2.65it/s]
+BLEU: 0.7314, acc: 0.6823, no_result: 0.1016, loss: 1.1590 ||:  50%|#####     | 12/24 [00:04<00:04,  2.75it/s]
+BLEU: 0.7305, acc: 0.6827, no_result: 0.1034, loss: 1.1814 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.67it/s]
+BLEU: 0.7311, acc: 0.6719, no_result: 0.0982, loss: 1.2333 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.57it/s]
+BLEU: 0.7202, acc: 0.6521, no_result: 0.1208, loss: 1.2912 ||:  62%|######2   | 15/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.7192, acc: 0.6484, no_result: 0.1230, loss: 1.3153 ||:  67%|######6   | 16/24 [00:06<00:03,  2.35it/s]
+BLEU: 0.7177, acc: 0.6415, no_result: 0.1305, loss: 1.3037 ||:  71%|#######   | 17/24 [00:06<00:02,  2.40it/s]
+BLEU: 0.7141, acc: 0.6406, no_result: 0.1337, loss: 1.3158 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.38it/s]
+BLEU: 0.7156, acc: 0.6447, no_result: 0.1299, loss: 1.2956 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.48it/s]
+BLEU: 0.7178, acc: 0.6469, no_result: 0.1266, loss: 1.2833 ||:  83%|########3 | 20/24 [00:07<00:01,  2.63it/s]
+BLEU: 0.7140, acc: 0.6414, no_result: 0.1220, loss: 1.2912 ||:  88%|########7 | 21/24 [00:07<00:01,  2.64it/s]
+BLEU: 0.7173, acc: 0.6307, no_result: 0.1193, loss: 1.2878 ||:  92%|#########1| 22/24 [00:08<00:00,  2.61it/s]
+BLEU: 0.7201, acc: 0.6386, no_result: 0.1155, loss: 1.2643 ||:  96%|#########5| 23/24 [00:08<00:00,  2.69it/s]
+BLEU: 0.7201, acc: 0.6398, no_result: 0.1153, loss: 1.2846 ||: 100%|##########| 24/24 [00:08<00:00,  3.32it/s]
+BLEU: 0.7201, acc: 0.6398, no_result: 0.1153, loss: 1.2846 ||: 100%|##########| 24/24 [00:08<00:00,  2.71it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:   1%|          | 1/110 [00:00<01:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||:   2%|1         | 2/110 [00:01<01:13,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0392 ||:   3%|2         | 3/110 [00:01<01:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0360 ||:   4%|3         | 4/110 [00:02<01:08,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:   5%|4         | 5/110 [00:03<01:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:   5%|5         | 6/110 [00:03<01:08,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0325 ||:   6%|6         | 7/110 [00:04<01:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0321 ||:   7%|7         | 8/110 [00:05<01:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0304 ||:   8%|8         | 9/110 [00:06<01:12,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0311 ||:   9%|9         | 10/110 [00:06<01:10,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0307 ||:  10%|#         | 11/110 [00:07<01:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0304 ||:  11%|#         | 12/110 [00:08<01:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  12%|#1        | 13/110 [00:08<01:05,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0306 ||:  13%|#2        | 14/110 [00:09<01:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0328 ||:  14%|#3        | 15/110 [00:10<01:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  15%|#4        | 16/110 [00:10<01:03,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0328 ||:  15%|#5        | 17/110 [00:11<01:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0324 ||:  16%|#6        | 18/110 [00:12<01:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0319 ||:  17%|#7        | 19/110 [00:12<01:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0319 ||:  18%|#8        | 20/110 [00:13<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0314 ||:  19%|#9        | 21/110 [00:14<00:57,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0309 ||:  20%|##        | 22/110 [00:14<00:56,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0309 ||:  21%|##        | 23/110 [00:15<00:56,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0303 ||:  22%|##1       | 24/110 [00:16<00:56,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0308 ||:  23%|##2       | 25/110 [00:16<00:55,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0305 ||:  24%|##3       | 26/110 [00:17<00:54,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0313 ||:  25%|##4       | 27/110 [00:17<00:52,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0311 ||:  25%|##5       | 28/110 [00:18<00:52,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  26%|##6       | 29/110 [00:19<00:50,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  27%|##7       | 30/110 [00:19<00:50,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:  28%|##8       | 31/110 [00:20<00:53,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0324 ||:  29%|##9       | 32/110 [00:21<00:55,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0324 ||:  30%|###       | 33/110 [00:22<00:53,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:  31%|###       | 34/110 [00:22<00:53,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  32%|###1      | 35/110 [00:23<00:50,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  33%|###2      | 36/110 [00:24<00:49,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0321 ||:  34%|###3      | 37/110 [00:24<00:50,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0327 ||:  35%|###4      | 38/110 [00:25<00:50,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0326 ||:  35%|###5      | 39/110 [00:26<00:50,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0336 ||:  36%|###6      | 40/110 [00:26<00:48,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  37%|###7      | 41/110 [00:27<00:48,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0333 ||:  38%|###8      | 42/110 [00:28<00:46,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:  39%|###9      | 43/110 [00:28<00:45,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:  40%|####      | 44/110 [00:29<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  41%|####      | 45/110 [00:30<00:47,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:  42%|####1     | 46/110 [00:31<00:44,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:  43%|####2     | 47/110 [00:31<00:43,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0336 ||:  44%|####3     | 48/110 [00:32<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0334 ||:  45%|####4     | 49/110 [00:33<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0337 ||:  45%|####5     | 50/110 [00:33<00:43,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  46%|####6     | 51/110 [00:34<00:40,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0336 ||:  47%|####7     | 52/110 [00:35<00:38,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  48%|####8     | 53/110 [00:35<00:38,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0339 ||:  49%|####9     | 54/110 [00:36<00:37,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0340 ||:  50%|#####     | 55/110 [00:37<00:36,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  51%|#####     | 56/110 [00:37<00:35,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0356 ||:  52%|#####1    | 57/110 [00:38<00:36,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0354 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0352 ||:  54%|#####3    | 59/110 [00:39<00:32,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:  55%|#####4    | 60/110 [00:40<00:32,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||:  55%|#####5    | 61/110 [00:41<00:31,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||:  56%|#####6    | 62/110 [00:41<00:33,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0350 ||:  57%|#####7    | 63/110 [00:42<00:33,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||:  58%|#####8    | 64/110 [00:43<00:32,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0347 ||:  59%|#####9    | 65/110 [00:43<00:30,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0355 ||:  60%|######    | 66/110 [00:44<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  61%|######    | 67/110 [00:45<00:29,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0357 ||:  62%|######1   | 68/110 [00:45<00:29,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  63%|######2   | 69/110 [00:46<00:28,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  64%|######3   | 70/110 [00:47<00:27,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  65%|######4   | 71/110 [00:47<00:25,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  65%|######5   | 72/110 [00:48<00:24,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0358 ||:  66%|######6   | 73/110 [00:49<00:27,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  67%|######7   | 74/110 [00:50<00:25,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  68%|######8   | 75/110 [00:50<00:24,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0359 ||:  69%|######9   | 76/110 [00:51<00:24,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0357 ||:  70%|#######   | 77/110 [00:52<00:22,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0355 ||:  71%|#######   | 78/110 [00:52<00:21,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0354 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0351 ||:  73%|#######2  | 80/110 [00:55<00:28,  1.05it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0352 ||:  74%|#######3  | 81/110 [00:55<00:25,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0352 ||:  75%|#######4  | 82/110 [00:56<00:23,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0353 ||:  75%|#######5  | 83/110 [00:57<00:20,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0352 ||:  76%|#######6  | 84/110 [00:57<00:19,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0353 ||:  77%|#######7  | 85/110 [00:58<00:18,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0353 ||:  78%|#######8  | 86/110 [00:59<00:16,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0351 ||:  79%|#######9  | 87/110 [01:00<00:17,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||:  80%|########  | 88/110 [01:00<00:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:  81%|########  | 89/110 [01:01<00:15,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0350 ||:  82%|########1 | 90/110 [01:02<00:14,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0351 ||:  83%|########2 | 91/110 [01:02<00:13,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0351 ||:  84%|########3 | 92/110 [01:03<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0351 ||:  85%|########4 | 93/110 [01:04<00:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||:  85%|########5 | 94/110 [01:04<00:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0347 ||:  86%|########6 | 95/110 [01:05<00:09,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  87%|########7 | 96/110 [01:05<00:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  88%|########8 | 97/110 [01:06<00:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0344 ||:  89%|########9 | 98/110 [01:07<00:08,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0345 ||:  90%|######### | 99/110 [01:08<00:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0344 ||:  91%|######### | 100/110 [01:08<00:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0347 ||:  92%|#########1| 101/110 [01:09<00:06,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0345 ||:  93%|#########2| 102/110 [01:10<00:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  94%|#########3| 103/110 [01:10<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  95%|#########4| 104/110 [01:11<00:03,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  95%|#########5| 105/110 [01:12<00:03,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0349 ||:  96%|#########6| 106/110 [01:12<00:02,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0347 ||:  97%|#########7| 107/110 [01:13<00:01,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0346 ||:  98%|#########8| 108/110 [01:13<00:01,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0344 ||:  99%|#########9| 109/110 [01:14<00:00,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0343 ||: 100%|##########| 110/110 [01:14<00:00,  1.81it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0343 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6562, no_result: 0.1250, loss: 1.1765 ||:   4%|4         | 1/24 [00:00<00:08,  2.71it/s]
+BLEU: 0.7115, acc: 0.6719, no_result: 0.0938, loss: 1.1184 ||:   8%|8         | 2/24 [00:00<00:07,  2.78it/s]
+BLEU: 0.6920, acc: 0.6354, no_result: 0.1250, loss: 1.2295 ||:  12%|#2        | 3/24 [00:01<00:07,  2.79it/s]
+BLEU: 0.7203, acc: 0.6484, no_result: 0.1172, loss: 1.1363 ||:  17%|#6        | 4/24 [00:01<00:07,  2.71it/s]
+BLEU: 0.7408, acc: 0.6625, no_result: 0.1250, loss: 1.1047 ||:  21%|##        | 5/24 [00:01<00:07,  2.65it/s]
+BLEU: 0.7440, acc: 0.6406, no_result: 0.1198, loss: 1.1036 ||:  25%|##5       | 6/24 [00:02<00:06,  2.76it/s]
+BLEU: 0.7500, acc: 0.6562, no_result: 0.1116, loss: 1.0765 ||:  29%|##9       | 7/24 [00:02<00:05,  2.88it/s]
+BLEU: 0.7429, acc: 0.6641, no_result: 0.1094, loss: 1.0957 ||:  33%|###3      | 8/24 [00:02<00:05,  2.92it/s]
+BLEU: 0.7325, acc: 0.6597, no_result: 0.1181, loss: 1.0895 ||:  38%|###7      | 9/24 [00:03<00:05,  2.86it/s]
+BLEU: 0.7416, acc: 0.6500, no_result: 0.1187, loss: 1.1037 ||:  42%|####1     | 10/24 [00:03<00:05,  2.78it/s]
+BLEU: 0.7411, acc: 0.6364, no_result: 0.1222, loss: 1.1520 ||:  46%|####5     | 11/24 [00:04<00:04,  2.65it/s]
+BLEU: 0.7359, acc: 0.6432, no_result: 0.1172, loss: 1.1664 ||:  50%|#####     | 12/24 [00:04<00:04,  2.75it/s]
+BLEU: 0.7347, acc: 0.6418, no_result: 0.1202, loss: 1.1829 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.67it/s]
+BLEU: 0.7331, acc: 0.6339, no_result: 0.1161, loss: 1.2332 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.58it/s]
+BLEU: 0.7201, acc: 0.6188, no_result: 0.1333, loss: 1.2900 ||:  62%|######2   | 15/24 [00:05<00:03,  2.39it/s]
+BLEU: 0.7183, acc: 0.6133, no_result: 0.1348, loss: 1.3131 ||:  67%|######6   | 16/24 [00:06<00:03,  2.34it/s]
+BLEU: 0.7155, acc: 0.6085, no_result: 0.1397, loss: 1.3012 ||:  71%|#######   | 17/24 [00:06<00:02,  2.39it/s]
+BLEU: 0.7127, acc: 0.6076, no_result: 0.1424, loss: 1.3115 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.38it/s]
+BLEU: 0.7146, acc: 0.6135, no_result: 0.1382, loss: 1.2911 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.48it/s]
+BLEU: 0.7181, acc: 0.6188, no_result: 0.1344, loss: 1.2783 ||:  83%|########3 | 20/24 [00:07<00:01,  2.62it/s]
+BLEU: 0.7155, acc: 0.6176, no_result: 0.1295, loss: 1.2838 ||:  88%|########7 | 21/24 [00:07<00:01,  2.62it/s]
+BLEU: 0.7203, acc: 0.6108, no_result: 0.1264, loss: 1.2803 ||:  92%|#########1| 22/24 [00:08<00:00,  2.59it/s]
+BLEU: 0.7242, acc: 0.6196, no_result: 0.1223, loss: 1.2579 ||:  96%|#########5| 23/24 [00:08<00:00,  2.67it/s]
+BLEU: 0.7241, acc: 0.6169, no_result: 0.1218, loss: 1.2770 ||: 100%|##########| 24/24 [00:08<00:00,  3.31it/s]
+BLEU: 0.7241, acc: 0.6169, no_result: 0.1218, loss: 1.2770 ||: 100%|##########| 24/24 [00:08<00:00,  2.71it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0281 ||:   1%|          | 1/110 [00:00<01:12,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:   2%|1         | 2/110 [00:01<01:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:   3%|2         | 3/110 [00:02<01:19,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:   4%|3         | 4/110 [00:02<01:18,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:   5%|4         | 5/110 [00:03<01:13,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:   5%|5         | 6/110 [00:04<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0278 ||:   6%|6         | 7/110 [00:04<01:12,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:   7%|7         | 8/110 [00:05<01:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0276 ||:   8%|8         | 9/110 [00:06<01:13,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0275 ||:   9%|9         | 10/110 [00:07<01:09,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0271 ||:  10%|#         | 11/110 [00:07<01:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0273 ||:  11%|#         | 12/110 [00:08<01:04,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0263 ||:  12%|#1        | 13/110 [00:09<01:05,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  13%|#2        | 14/110 [00:09<01:12,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  14%|#3        | 15/110 [00:10<01:09,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  15%|#4        | 16/110 [00:11<01:09,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  15%|#5        | 17/110 [00:12<01:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  16%|#6        | 18/110 [00:12<01:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  17%|#7        | 19/110 [00:13<01:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  18%|#8        | 20/110 [00:14<01:05,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  19%|#9        | 21/110 [00:15<01:06,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0278 ||:  20%|##        | 22/110 [00:15<01:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0279 ||:  21%|##        | 23/110 [00:16<00:57,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0279 ||:  22%|##1       | 24/110 [00:16<00:59,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0278 ||:  23%|##2       | 25/110 [00:17<00:58,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0280 ||:  24%|##3       | 26/110 [00:18<00:58,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0280 ||:  25%|##4       | 27/110 [00:19<00:56,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0280 ||:  25%|##5       | 28/110 [00:19<00:57,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0281 ||:  26%|##6       | 29/110 [00:20<00:54,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0280 ||:  27%|##7       | 30/110 [00:21<00:53,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  28%|##8       | 31/110 [00:21<00:51,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  29%|##9       | 32/110 [00:22<00:52,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  30%|###       | 33/110 [00:23<00:50,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  31%|###       | 34/110 [00:23<00:48,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0297 ||:  32%|###1      | 35/110 [00:24<00:47,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  33%|###2      | 36/110 [00:24<00:48,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0312 ||:  34%|###3      | 37/110 [00:25<00:48,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  35%|###4      | 38/110 [00:26<00:47,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  35%|###5      | 39/110 [00:26<00:46,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  36%|###6      | 40/110 [00:27<00:45,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  37%|###7      | 41/110 [00:28<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:  38%|###8      | 42/110 [00:28<00:44,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0321 ||:  39%|###9      | 43/110 [00:29<00:42,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0325 ||:  40%|####      | 44/110 [00:30<00:42,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0325 ||:  41%|####      | 45/110 [00:30<00:42,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0327 ||:  42%|####1     | 46/110 [00:31<00:40,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0326 ||:  43%|####2     | 47/110 [00:32<00:41,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  44%|####3     | 48/110 [00:32<00:40,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:  45%|####4     | 49/110 [00:33<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  45%|####5     | 50/110 [00:33<00:39,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0325 ||:  46%|####6     | 51/110 [00:34<00:39,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  47%|####7     | 52/110 [00:35<00:39,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  48%|####8     | 53/110 [00:36<00:39,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  49%|####9     | 54/110 [00:36<00:38,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  50%|#####     | 55/110 [00:37<00:36,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0321 ||:  51%|#####     | 56/110 [00:38<00:36,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:  52%|#####1    | 57/110 [00:38<00:35,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0324 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0324 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0321 ||:  55%|#####4    | 60/110 [00:40<00:32,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  55%|#####5    | 61/110 [00:41<00:31,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0319 ||:  56%|#####6    | 62/110 [00:41<00:31,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  57%|#####7    | 63/110 [00:42<00:30,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0319 ||:  58%|#####8    | 64/110 [00:43<00:29,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0319 ||:  59%|#####9    | 65/110 [00:44<00:31,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  60%|######    | 66/110 [00:44<00:30,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  61%|######    | 67/110 [00:45<00:31,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0313 ||:  62%|######1   | 68/110 [00:46<00:29,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  63%|######2   | 69/110 [00:46<00:27,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  64%|######3   | 70/110 [00:48<00:38,  1.03it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  65%|######4   | 71/110 [00:49<00:34,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  65%|######5   | 72/110 [00:49<00:31,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0318 ||:  66%|######6   | 73/110 [00:50<00:30,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  67%|######7   | 74/110 [00:51<00:29,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0319 ||:  68%|######8   | 75/110 [00:52<00:26,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0319 ||:  69%|######9   | 76/110 [00:52<00:24,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0319 ||:  70%|#######   | 77/110 [00:53<00:23,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0319 ||:  71%|#######   | 78/110 [00:54<00:22,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0318 ||:  72%|#######1  | 79/110 [00:54<00:20,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0318 ||:  73%|#######2  | 80/110 [00:55<00:21,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  74%|#######3  | 81/110 [00:56<00:19,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  75%|#######4  | 82/110 [00:56<00:18,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  75%|#######5  | 83/110 [00:57<00:18,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  76%|#######6  | 84/110 [00:58<00:17,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  77%|#######7  | 85/110 [00:58<00:17,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  78%|#######8  | 86/110 [00:59<00:17,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  79%|#######9  | 87/110 [01:00<00:17,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  80%|########  | 88/110 [01:01<00:15,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  81%|########  | 89/110 [01:01<00:14,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  82%|########1 | 90/110 [01:02<00:14,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0314 ||:  83%|########2 | 91/110 [01:03<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0313 ||:  84%|########3 | 92/110 [01:03<00:11,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  85%|########4 | 93/110 [01:04<00:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  85%|########5 | 94/110 [01:05<00:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  86%|########6 | 95/110 [01:05<00:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  87%|########7 | 96/110 [01:06<00:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  88%|########8 | 97/110 [01:07<00:08,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  89%|########9 | 98/110 [01:07<00:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  90%|######### | 99/110 [01:08<00:07,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  91%|######### | 100/110 [01:09<00:06,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0318 ||:  92%|#########1| 101/110 [01:09<00:05,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0321 ||:  93%|#########2| 102/110 [01:10<00:05,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  94%|#########3| 103/110 [01:11<00:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  95%|#########4| 104/110 [01:11<00:04,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0319 ||:  95%|#########5| 105/110 [01:12<00:03,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  96%|#########6| 106/110 [01:13<00:02,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:  97%|#########7| 107/110 [01:13<00:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:  98%|#########8| 108/110 [01:14<00:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||:  99%|#########9| 109/110 [01:15<00:00,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||: 100%|##########| 110/110 [01:15<00:00,  1.74it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0323 ||: 100%|##########| 110/110 [01:15<00:00,  1.46it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.5938, no_result: 0.1250, loss: 1.1190 ||:   4%|4         | 1/24 [00:00<00:08,  2.69it/s]
+BLEU: 0.6997, acc: 0.6562, no_result: 0.1094, loss: 1.0596 ||:   8%|8         | 2/24 [00:00<00:07,  2.76it/s]
+BLEU: 0.6763, acc: 0.6354, no_result: 0.1354, loss: 1.2064 ||:  12%|#2        | 3/24 [00:01<00:07,  2.78it/s]
+BLEU: 0.7014, acc: 0.6484, no_result: 0.1250, loss: 1.1275 ||:  17%|#6        | 4/24 [00:01<00:07,  2.68it/s]
+BLEU: 0.7290, acc: 0.6875, no_result: 0.1187, loss: 1.0881 ||:  21%|##        | 5/24 [00:01<00:07,  2.65it/s]
+BLEU: 0.7270, acc: 0.6719, no_result: 0.1198, loss: 1.0943 ||:  25%|##5       | 6/24 [00:02<00:06,  2.76it/s]
+BLEU: 0.7308, acc: 0.6830, no_result: 0.1116, loss: 1.0682 ||:  29%|##9       | 7/24 [00:02<00:05,  2.88it/s]
+BLEU: 0.7301, acc: 0.6914, no_result: 0.1094, loss: 1.0975 ||:  33%|###3      | 8/24 [00:02<00:05,  2.92it/s]
+BLEU: 0.7223, acc: 0.6979, no_result: 0.1146, loss: 1.0934 ||:  38%|###7      | 9/24 [00:03<00:05,  2.86it/s]
+BLEU: 0.7325, acc: 0.6875, no_result: 0.1094, loss: 1.1101 ||:  42%|####1     | 10/24 [00:03<00:05,  2.79it/s]
+BLEU: 0.7323, acc: 0.6733, no_result: 0.1165, loss: 1.1575 ||:  46%|####5     | 11/24 [00:03<00:04,  2.67it/s]
+BLEU: 0.7282, acc: 0.6797, no_result: 0.1094, loss: 1.1724 ||:  50%|#####     | 12/24 [00:04<00:04,  2.76it/s]
+BLEU: 0.7273, acc: 0.6779, no_result: 0.1154, loss: 1.1951 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.67it/s]
+BLEU: 0.7269, acc: 0.6652, no_result: 0.1161, loss: 1.2467 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.57it/s]
+BLEU: 0.7141, acc: 0.6458, no_result: 0.1375, loss: 1.3059 ||:  62%|######2   | 15/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.7121, acc: 0.6406, no_result: 0.1387, loss: 1.3316 ||:  67%|######6   | 16/24 [00:06<00:03,  2.34it/s]
+BLEU: 0.7104, acc: 0.6342, no_result: 0.1452, loss: 1.3207 ||:  71%|#######   | 17/24 [00:06<00:02,  2.39it/s]
+BLEU: 0.7075, acc: 0.6354, no_result: 0.1476, loss: 1.3311 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.39it/s]
+BLEU: 0.7093, acc: 0.6382, no_result: 0.1431, loss: 1.3112 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.49it/s]
+BLEU: 0.7118, acc: 0.6422, no_result: 0.1391, loss: 1.2985 ||:  83%|########3 | 20/24 [00:07<00:01,  2.63it/s]
+BLEU: 0.7071, acc: 0.6354, no_result: 0.1339, loss: 1.3023 ||:  88%|########7 | 21/24 [00:07<00:01,  2.63it/s]
+BLEU: 0.7118, acc: 0.6278, no_result: 0.1307, loss: 1.2985 ||:  92%|#########1| 22/24 [00:08<00:00,  2.61it/s]
+BLEU: 0.7154, acc: 0.6359, no_result: 0.1264, loss: 1.2750 ||:  96%|#########5| 23/24 [00:08<00:00,  2.69it/s]
+BLEU: 0.7154, acc: 0.6325, no_result: 0.1257, loss: 1.2910 ||: 100%|##########| 24/24 [00:08<00:00,  3.34it/s]
+BLEU: 0.7154, acc: 0.6325, no_result: 0.1257, loss: 1.2910 ||: 100%|##########| 24/24 [00:08<00:00,  2.71it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:   1%|          | 1/110 [00:00<01:27,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0254 ||:   2%|1         | 2/110 [00:01<01:22,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:   3%|2         | 3/110 [00:02<01:13,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:   4%|3         | 4/110 [00:02<01:11,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0309 ||:   5%|4         | 5/110 [00:03<01:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:   5%|5         | 6/110 [00:04<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:   6%|6         | 7/110 [00:04<01:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0278 ||:   7%|7         | 8/110 [00:05<01:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:   8%|8         | 9/110 [00:06<01:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:   9%|9         | 10/110 [00:06<01:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  10%|#         | 11/110 [00:07<01:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  11%|#         | 12/110 [00:08<01:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  12%|#1        | 13/110 [00:09<01:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  13%|#2        | 14/110 [00:09<01:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  14%|#3        | 15/110 [00:10<01:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  15%|#4        | 16/110 [00:11<01:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0304 ||:  15%|#5        | 17/110 [00:11<01:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0304 ||:  16%|#6        | 18/110 [00:12<01:03,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0301 ||:  17%|#7        | 19/110 [00:13<01:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  18%|#8        | 20/110 [00:13<01:01,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  19%|#9        | 21/110 [00:14<00:59,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  20%|##        | 22/110 [00:15<00:59,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0299 ||:  21%|##        | 23/110 [00:15<00:57,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0297 ||:  22%|##1       | 24/110 [00:16<01:00,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  23%|##2       | 25/110 [00:17<00:58,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0302 ||:  24%|##3       | 26/110 [00:17<00:57,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0297 ||:  25%|##4       | 27/110 [00:18<00:55,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  25%|##5       | 28/110 [00:19<00:53,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  26%|##6       | 29/110 [00:19<00:52,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0302 ||:  27%|##7       | 30/110 [00:20<00:51,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0304 ||:  28%|##8       | 31/110 [00:21<00:53,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0303 ||:  29%|##9       | 32/110 [00:22<00:58,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  30%|###       | 33/110 [00:22<00:55,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0304 ||:  31%|###       | 34/110 [00:23<00:55,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0303 ||:  32%|###1      | 35/110 [00:24<00:54,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  33%|###2      | 36/110 [00:24<00:51,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0299 ||:  34%|###3      | 37/110 [00:25<00:49,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  35%|###4      | 38/110 [00:26<00:50,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  35%|###5      | 39/110 [00:27<00:49,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0296 ||:  36%|###6      | 40/110 [00:27<00:49,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  37%|###7      | 41/110 [00:28<00:48,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0296 ||:  38%|###8      | 42/110 [00:29<00:49,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  39%|###9      | 43/110 [00:29<00:47,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  40%|####      | 44/110 [00:30<00:48,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  41%|####      | 45/110 [00:31<00:45,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  42%|####1     | 46/110 [00:31<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  43%|####2     | 47/110 [00:32<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  44%|####3     | 48/110 [00:33<00:42,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  45%|####4     | 49/110 [00:33<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  45%|####5     | 50/110 [00:34<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  46%|####6     | 51/110 [00:35<00:38,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  47%|####7     | 52/110 [00:35<00:36,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0301 ||:  48%|####8     | 53/110 [00:36<00:35,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0299 ||:  49%|####9     | 54/110 [00:37<00:35,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  50%|#####     | 55/110 [00:37<00:36,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  51%|#####     | 56/110 [00:38<00:35,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  52%|#####1    | 57/110 [00:39<00:34,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  53%|#####2    | 58/110 [00:39<00:33,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  54%|#####3    | 59/110 [00:40<00:32,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  55%|#####4    | 60/110 [00:41<00:47,  1.06it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  55%|#####5    | 61/110 [00:42<00:42,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  56%|#####6    | 62/110 [00:43<00:39,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  57%|#####7    | 63/110 [00:44<00:36,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0297 ||:  58%|#####8    | 64/110 [00:44<00:33,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0297 ||:  59%|#####9    | 65/110 [00:45<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  60%|######    | 66/110 [00:46<00:31,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0301 ||:  61%|######    | 67/110 [00:46<00:29,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0304 ||:  62%|######1   | 68/110 [00:47<00:28,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0304 ||:  63%|######2   | 69/110 [00:47<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0302 ||:  64%|######3   | 70/110 [00:48<00:26,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  65%|######4   | 71/110 [00:49<00:26,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0302 ||:  65%|######5   | 72/110 [00:49<00:25,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0303 ||:  66%|######6   | 73/110 [00:50<00:26,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0302 ||:  67%|######7   | 74/110 [00:51<00:26,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0305 ||:  68%|######8   | 75/110 [00:52<00:24,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0305 ||:  69%|######9   | 76/110 [00:53<00:25,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0309 ||:  70%|#######   | 77/110 [00:53<00:24,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0311 ||:  71%|#######   | 78/110 [00:54<00:22,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0309 ||:  72%|#######1  | 79/110 [00:54<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0312 ||:  73%|#######2  | 80/110 [00:55<00:20,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0311 ||:  74%|#######3  | 81/110 [00:56<00:21,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0312 ||:  75%|#######4  | 82/110 [00:57<00:19,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0312 ||:  75%|#######5  | 83/110 [00:57<00:18,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0310 ||:  76%|#######6  | 84/110 [00:58<00:17,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0308 ||:  77%|#######7  | 85/110 [00:59<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0310 ||:  78%|#######8  | 86/110 [00:59<00:15,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0310 ||:  79%|#######9  | 87/110 [01:00<00:15,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0309 ||:  80%|########  | 88/110 [01:01<00:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0308 ||:  81%|########  | 89/110 [01:01<00:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0309 ||:  82%|########1 | 90/110 [01:02<00:14,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0309 ||:  83%|########2 | 91/110 [01:03<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0311 ||:  84%|########3 | 92/110 [01:03<00:12,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0312 ||:  85%|########4 | 93/110 [01:04<00:11,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  85%|########5 | 94/110 [01:05<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0314 ||:  86%|########6 | 95/110 [01:05<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0314 ||:  87%|########7 | 96/110 [01:06<00:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  88%|########8 | 97/110 [01:07<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  89%|########9 | 98/110 [01:08<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0318 ||:  90%|######### | 99/110 [01:08<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0317 ||:  91%|######### | 100/110 [01:09<00:06,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0318 ||:  92%|#########1| 101/110 [01:09<00:05,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0319 ||:  93%|#########2| 102/110 [01:10<00:05,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  94%|#########3| 103/110 [01:11<00:04,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:  95%|#########4| 104/110 [01:11<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0321 ||:  95%|#########5| 105/110 [01:12<00:03,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0321 ||:  96%|#########6| 106/110 [01:13<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  97%|#########7| 107/110 [01:14<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  98%|#########8| 108/110 [01:14<00:01,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0321 ||:  99%|#########9| 109/110 [01:15<00:00,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||: 100%|##########| 110/110 [01:15<00:00,  1.68it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||: 100%|##########| 110/110 [01:15<00:00,  1.45it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6562, no_result: 0.0625, loss: 1.1265 ||:   4%|4         | 1/24 [00:00<00:08,  2.73it/s]
+BLEU: 0.6825, acc: 0.6719, no_result: 0.0781, loss: 1.0911 ||:   8%|8         | 2/24 [00:00<00:07,  2.82it/s]
+BLEU: 0.6746, acc: 0.6458, no_result: 0.1146, loss: 1.2151 ||:  12%|#2        | 3/24 [00:01<00:07,  2.85it/s]
+BLEU: 0.6981, acc: 0.6562, no_result: 0.1094, loss: 1.1348 ||:  17%|#6        | 4/24 [00:01<00:07,  2.75it/s]
+BLEU: 0.7234, acc: 0.6875, no_result: 0.1062, loss: 1.0979 ||:  21%|##        | 5/24 [00:01<00:06,  2.72it/s]
+BLEU: 0.7202, acc: 0.6771, no_result: 0.0990, loss: 1.1004 ||:  25%|##5       | 6/24 [00:02<00:06,  2.82it/s]
+BLEU: 0.7247, acc: 0.6875, no_result: 0.0938, loss: 1.0768 ||:  29%|##9       | 7/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7246, acc: 0.6992, no_result: 0.0898, loss: 1.1070 ||:  33%|###3      | 8/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.7103, acc: 0.7014, no_result: 0.0938, loss: 1.1032 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7211, acc: 0.6906, no_result: 0.0906, loss: 1.1161 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.7183, acc: 0.6847, no_result: 0.0938, loss: 1.1667 ||:  46%|####5     | 11/24 [00:03<00:04,  2.71it/s]
+BLEU: 0.7149, acc: 0.6875, no_result: 0.0885, loss: 1.1798 ||:  50%|#####     | 12/24 [00:04<00:04,  2.79it/s]
+BLEU: 0.7151, acc: 0.6851, no_result: 0.0938, loss: 1.2008 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7140, acc: 0.6719, no_result: 0.0915, loss: 1.2508 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.63it/s]
+BLEU: 0.7014, acc: 0.6542, no_result: 0.1125, loss: 1.3054 ||:  62%|######2   | 15/24 [00:05<00:03,  2.45it/s]
+BLEU: 0.7000, acc: 0.6484, no_result: 0.1152, loss: 1.3288 ||:  67%|######6   | 16/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.6992, acc: 0.6415, no_result: 0.1213, loss: 1.3176 ||:  71%|#######   | 17/24 [00:06<00:02,  2.48it/s]
+BLEU: 0.6969, acc: 0.6406, no_result: 0.1233, loss: 1.3275 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.6991, acc: 0.6447, no_result: 0.1201, loss: 1.3080 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.7007, acc: 0.6484, no_result: 0.1172, loss: 1.2953 ||:  83%|########3 | 20/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.6970, acc: 0.6414, no_result: 0.1146, loss: 1.3001 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7026, acc: 0.6364, no_result: 0.1122, loss: 1.2966 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.7060, acc: 0.6440, no_result: 0.1087, loss: 1.2747 ||:  96%|#########5| 23/24 [00:08<00:00,  2.72it/s]
+BLEU: 0.7061, acc: 0.6450, no_result: 0.1042, loss: 1.2931 ||: 100%|##########| 24/24 [00:08<00:00,  3.38it/s]
+BLEU: 0.7061, acc: 0.6450, no_result: 0.1042, loss: 1.2931 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:   1%|          | 1/110 [00:00<01:34,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:   2%|1         | 2/110 [00:01<01:18,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0268 ||:   3%|2         | 3/110 [00:02<01:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0276 ||:   4%|3         | 4/110 [00:02<01:16,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:   5%|4         | 5/110 [00:03<01:16,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0342 ||:   5%|5         | 6/110 [00:04<01:14,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0355 ||:   6%|6         | 7/110 [00:05<01:16,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:   7%|7         | 8/110 [00:05<01:13,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0350 ||:   8%|8         | 9/110 [00:06<01:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0341 ||:   9%|9         | 10/110 [00:07<01:07,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0361 ||:  10%|#         | 11/110 [00:07<01:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0348 ||:  11%|#         | 12/110 [00:08<01:06,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0345 ||:  12%|#1        | 13/110 [00:09<01:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0339 ||:  13%|#2        | 14/110 [00:09<01:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0335 ||:  14%|#3        | 15/110 [00:10<01:08,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0324 ||:  15%|#4        | 16/110 [00:11<01:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0320 ||:  15%|#5        | 17/110 [00:12<01:03,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0316 ||:  16%|#6        | 18/110 [00:12<01:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0315 ||:  17%|#7        | 19/110 [00:13<01:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0308 ||:  18%|#8        | 20/110 [00:14<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0308 ||:  19%|#9        | 21/110 [00:14<00:59,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0308 ||:  20%|##        | 22/110 [00:15<01:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0306 ||:  21%|##        | 23/110 [00:16<01:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0299 ||:  22%|##1       | 24/110 [00:16<01:00,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  23%|##2       | 25/110 [00:17<00:58,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0299 ||:  24%|##3       | 26/110 [00:18<00:56,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  25%|##4       | 27/110 [00:18<00:56,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  25%|##5       | 28/110 [00:19<00:54,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  26%|##6       | 29/110 [00:20<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0303 ||:  27%|##7       | 30/110 [00:20<00:52,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0305 ||:  28%|##8       | 31/110 [00:21<00:51,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0306 ||:  29%|##9       | 32/110 [00:22<00:50,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0306 ||:  30%|###       | 33/110 [00:22<00:49,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0305 ||:  31%|###       | 34/110 [00:23<00:51,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0302 ||:  32%|###1      | 35/110 [00:24<00:53,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0301 ||:  33%|###2      | 36/110 [00:25<00:53,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0304 ||:  34%|###3      | 37/110 [00:25<00:55,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0311 ||:  35%|###4      | 38/110 [00:26<00:53,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0308 ||:  35%|###5      | 39/110 [00:27<00:51,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0304 ||:  36%|###6      | 40/110 [00:27<00:49,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0303 ||:  37%|###7      | 41/110 [00:28<00:48,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  38%|###8      | 42/110 [00:29<00:47,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0297 ||:  39%|###9      | 43/110 [00:30<00:47,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  40%|####      | 44/110 [00:30<00:47,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  41%|####      | 45/110 [00:31<00:51,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0299 ||:  42%|####1     | 46/110 [00:32<00:47,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  43%|####2     | 47/110 [00:33<00:45,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  44%|####3     | 48/110 [00:33<00:46,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  45%|####4     | 49/110 [00:34<00:42,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  45%|####5     | 50/110 [00:35<00:55,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  46%|####6     | 51/110 [00:36<00:50,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  47%|####7     | 52/110 [00:37<00:45,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  48%|####8     | 53/110 [00:37<00:41,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  49%|####9     | 54/110 [00:38<00:40,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  50%|#####     | 55/110 [00:39<00:40,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  51%|#####     | 56/110 [00:39<00:38,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  52%|#####1    | 57/110 [00:40<00:39,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  53%|#####2    | 58/110 [00:41<00:39,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  54%|#####3    | 59/110 [00:42<00:36,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  55%|#####4    | 60/110 [00:42<00:34,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  55%|#####5    | 61/110 [00:43<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  56%|#####6    | 62/110 [00:44<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  57%|#####7    | 63/110 [00:44<00:30,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  58%|#####8    | 64/110 [00:45<00:31,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  59%|#####9    | 65/110 [00:46<00:30,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  60%|######    | 66/110 [00:46<00:31,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  61%|######    | 67/110 [00:47<00:30,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  62%|######1   | 68/110 [00:48<00:28,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  63%|######2   | 69/110 [00:48<00:28,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  64%|######3   | 70/110 [00:49<00:26,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  65%|######4   | 71/110 [00:50<00:25,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  65%|######5   | 72/110 [00:50<00:24,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  66%|######6   | 73/110 [00:51<00:24,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  67%|######7   | 74/110 [00:52<00:24,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  68%|######8   | 75/110 [00:53<00:26,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  69%|######9   | 76/110 [00:53<00:24,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  70%|#######   | 77/110 [00:54<00:23,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0281 ||:  71%|#######   | 78/110 [00:54<00:21,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0279 ||:  72%|#######1  | 79/110 [00:55<00:21,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  73%|#######2  | 80/110 [00:56<00:20,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  74%|#######3  | 81/110 [00:57<00:19,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:  75%|#######4  | 82/110 [00:57<00:18,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  75%|#######5  | 83/110 [00:58<00:17,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  76%|#######6  | 84/110 [00:59<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  77%|#######7  | 85/110 [00:59<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  78%|#######8  | 86/110 [01:00<00:17,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:  79%|#######9  | 87/110 [01:01<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  80%|########  | 88/110 [01:01<00:15,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  81%|########  | 89/110 [01:02<00:14,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:  82%|########1 | 90/110 [01:03<00:14,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:  83%|########2 | 91/110 [01:03<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  84%|########3 | 92/110 [01:04<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  85%|########4 | 93/110 [01:05<00:11,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  85%|########5 | 94/110 [01:05<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  86%|########6 | 95/110 [01:06<00:09,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  87%|########7 | 96/110 [01:07<00:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  88%|########8 | 97/110 [01:07<00:08,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  89%|########9 | 98/110 [01:08<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  90%|######### | 99/110 [01:09<00:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0297 ||:  91%|######### | 100/110 [01:09<00:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  92%|#########1| 101/110 [01:10<00:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  93%|#########2| 102/110 [01:11<00:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0299 ||:  94%|#########3| 103/110 [01:11<00:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0299 ||:  95%|#########4| 104/110 [01:12<00:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0301 ||:  95%|#########5| 105/110 [01:13<00:03,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0301 ||:  96%|#########6| 106/110 [01:14<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0302 ||:  97%|#########7| 107/110 [01:14<00:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0303 ||:  98%|#########8| 108/110 [01:15<00:01,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0303 ||:  99%|#########9| 109/110 [01:15<00:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0305 ||: 100%|##########| 110/110 [01:16<00:00,  1.80it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0305 ||: 100%|##########| 110/110 [01:16<00:00,  1.44it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6562, no_result: 0.0938, loss: 1.2257 ||:   4%|4         | 1/24 [00:00<00:08,  2.61it/s]
+BLEU: 0.7148, acc: 0.7031, no_result: 0.0938, loss: 1.1368 ||:   8%|8         | 2/24 [00:00<00:08,  2.73it/s]
+BLEU: 0.7000, acc: 0.6562, no_result: 0.1146, loss: 1.2416 ||:  12%|#2        | 3/24 [00:01<00:07,  2.78it/s]
+BLEU: 0.7214, acc: 0.6641, no_result: 0.1172, loss: 1.1525 ||:  17%|#6        | 4/24 [00:01<00:07,  2.67it/s]
+BLEU: 0.7418, acc: 0.6875, no_result: 0.1187, loss: 1.1155 ||:  21%|##        | 5/24 [00:01<00:07,  2.64it/s]
+BLEU: 0.7416, acc: 0.6771, no_result: 0.1094, loss: 1.1157 ||:  25%|##5       | 6/24 [00:02<00:06,  2.75it/s]
+BLEU: 0.7440, acc: 0.6875, no_result: 0.1027, loss: 1.0998 ||:  29%|##9       | 7/24 [00:02<00:05,  2.88it/s]
+BLEU: 0.7434, acc: 0.6953, no_result: 0.1016, loss: 1.1223 ||:  33%|###3      | 8/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.7323, acc: 0.7083, no_result: 0.1007, loss: 1.1134 ||:  38%|###7      | 9/24 [00:03<00:05,  2.85it/s]
+BLEU: 0.7423, acc: 0.6937, no_result: 0.1000, loss: 1.1283 ||:  42%|####1     | 10/24 [00:03<00:05,  2.79it/s]
+BLEU: 0.7393, acc: 0.6875, no_result: 0.1023, loss: 1.1797 ||:  46%|####5     | 11/24 [00:04<00:04,  2.67it/s]
+BLEU: 0.7351, acc: 0.6901, no_result: 0.0990, loss: 1.1937 ||:  50%|#####     | 12/24 [00:04<00:04,  2.76it/s]
+BLEU: 0.7335, acc: 0.6899, no_result: 0.1058, loss: 1.2128 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.67it/s]
+BLEU: 0.7328, acc: 0.6786, no_result: 0.1049, loss: 1.2585 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.58it/s]
+BLEU: 0.7198, acc: 0.6604, no_result: 0.1250, loss: 1.3151 ||:  62%|######2   | 15/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.7164, acc: 0.6543, no_result: 0.1270, loss: 1.3389 ||:  67%|######6   | 16/24 [00:06<00:03,  2.36it/s]
+BLEU: 0.7139, acc: 0.6471, no_result: 0.1324, loss: 1.3288 ||:  71%|#######   | 17/24 [00:06<00:02,  2.42it/s]
+BLEU: 0.7118, acc: 0.6441, no_result: 0.1372, loss: 1.3369 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.38it/s]
+BLEU: 0.7135, acc: 0.6480, no_result: 0.1332, loss: 1.3182 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.47it/s]
+BLEU: 0.7158, acc: 0.6516, no_result: 0.1297, loss: 1.3067 ||:  83%|########3 | 20/24 [00:07<00:01,  2.62it/s]
+BLEU: 0.7144, acc: 0.6488, no_result: 0.1250, loss: 1.3106 ||:  88%|########7 | 21/24 [00:08<00:01,  2.59it/s]
+BLEU: 0.7194, acc: 0.6449, no_result: 0.1222, loss: 1.3058 ||:  92%|#########1| 22/24 [00:08<00:00,  2.59it/s]
+BLEU: 0.7230, acc: 0.6495, no_result: 0.1196, loss: 1.2830 ||:  96%|#########5| 23/24 [00:08<00:00,  2.67it/s]
+BLEU: 0.7230, acc: 0.6502, no_result: 0.1192, loss: 1.3043 ||: 100%|##########| 24/24 [00:08<00:00,  3.31it/s]
+BLEU: 0.7230, acc: 0.6502, no_result: 0.1192, loss: 1.3043 ||: 100%|##########| 24/24 [00:08<00:00,  2.70it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:   1%|          | 1/110 [00:00<01:15,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:   2%|1         | 2/110 [00:01<01:07,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:   3%|2         | 3/110 [00:02<01:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:   4%|3         | 4/110 [00:02<01:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0277 ||:   5%|4         | 5/110 [00:03<01:08,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0254 ||:   5%|5         | 6/110 [00:03<01:05,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:   6%|6         | 7/110 [00:04<01:05,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:   7%|7         | 8/110 [00:05<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:   8%|8         | 9/110 [00:05<01:05,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:   9%|9         | 10/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0273 ||:  10%|#         | 11/110 [00:07<01:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0271 ||:  11%|#         | 12/110 [00:07<01:04,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0281 ||:  12%|#1        | 13/110 [00:08<01:04,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  13%|#2        | 14/110 [00:09<01:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0280 ||:  14%|#3        | 15/110 [00:09<01:02,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0273 ||:  15%|#4        | 16/110 [00:10<01:03,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0274 ||:  15%|#5        | 17/110 [00:11<01:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:  16%|#6        | 18/110 [00:11<00:59,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0273 ||:  17%|#7        | 19/110 [00:12<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0276 ||:  18%|#8        | 20/110 [00:13<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0276 ||:  19%|#9        | 21/110 [00:13<00:58,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0277 ||:  20%|##        | 22/110 [00:14<00:58,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0273 ||:  21%|##        | 23/110 [00:15<00:55,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0272 ||:  22%|##1       | 24/110 [00:15<00:55,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0272 ||:  23%|##2       | 25/110 [00:16<00:55,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0268 ||:  24%|##3       | 26/110 [00:17<00:53,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  25%|##4       | 27/110 [00:17<00:55,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0269 ||:  25%|##5       | 28/110 [00:18<00:53,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:  26%|##6       | 29/110 [00:19<00:51,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  27%|##7       | 30/110 [00:19<00:50,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  28%|##8       | 31/110 [00:20<00:51,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  29%|##9       | 32/110 [00:21<00:51,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0263 ||:  30%|###       | 33/110 [00:21<00:50,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  31%|###       | 34/110 [00:22<00:49,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  32%|###1      | 35/110 [00:22<00:49,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  33%|###2      | 36/110 [00:23<00:48,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  34%|###3      | 37/110 [00:24<00:49,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  35%|###4      | 38/110 [00:25<00:48,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  35%|###5      | 39/110 [00:25<00:48,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:  36%|###6      | 40/110 [00:27<01:04,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  37%|###7      | 41/110 [00:27<01:00,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  38%|###8      | 42/110 [00:28<00:54,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  39%|###9      | 43/110 [00:29<00:50,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0271 ||:  40%|####      | 44/110 [00:29<00:47,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:  41%|####      | 45/110 [00:30<00:44,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0269 ||:  42%|####1     | 46/110 [00:31<00:45,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0267 ||:  43%|####2     | 47/110 [00:31<00:43,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0267 ||:  44%|####3     | 48/110 [00:32<00:41,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  45%|####4     | 49/110 [00:33<00:41,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0269 ||:  45%|####5     | 50/110 [00:33<00:40,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0273 ||:  46%|####6     | 51/110 [00:34<00:40,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0271 ||:  47%|####7     | 52/110 [00:35<00:38,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:  48%|####8     | 53/110 [00:35<00:38,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:  49%|####9     | 54/110 [00:36<00:38,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0271 ||:  50%|#####     | 55/110 [00:37<00:38,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0271 ||:  51%|#####     | 56/110 [00:38<00:37,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:  52%|#####1    | 57/110 [00:38<00:37,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0271 ||:  53%|#####2    | 58/110 [00:39<00:35,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0269 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0268 ||:  55%|#####4    | 60/110 [00:40<00:32,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:  55%|#####5    | 61/110 [00:41<00:31,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0272 ||:  56%|#####6    | 62/110 [00:42<00:34,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0276 ||:  57%|#####7    | 63/110 [00:42<00:32,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0276 ||:  58%|#####8    | 64/110 [00:43<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0276 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0277 ||:  60%|######    | 66/110 [00:44<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0280 ||:  61%|######    | 67/110 [00:45<00:32,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0279 ||:  62%|######1   | 68/110 [00:46<00:31,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:  63%|######2   | 69/110 [00:47<00:30,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  64%|######3   | 70/110 [00:47<00:28,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  65%|######4   | 71/110 [00:48<00:27,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  65%|######5   | 72/110 [00:49<00:26,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  66%|######6   | 73/110 [00:49<00:25,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  67%|######7   | 74/110 [00:50<00:24,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  68%|######8   | 75/110 [00:51<00:23,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  69%|######9   | 76/110 [00:51<00:21,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  70%|#######   | 77/110 [00:52<00:20,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  71%|#######   | 78/110 [00:52<00:19,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  72%|#######1  | 79/110 [00:53<00:18,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  73%|#######2  | 80/110 [00:54<00:18,  1.67it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  74%|#######3  | 81/110 [00:54<00:18,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  75%|#######4  | 82/110 [00:55<00:19,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  75%|#######5  | 83/110 [00:56<00:19,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  76%|#######6  | 84/110 [00:57<00:19,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  77%|#######7  | 85/110 [00:57<00:18,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  80%|########  | 88/110 [00:59<00:13,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  81%|########  | 89/110 [01:00<00:14,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  82%|########1 | 90/110 [01:01<00:13,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  83%|########2 | 91/110 [01:01<00:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  84%|########3 | 92/110 [01:02<00:13,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  85%|########4 | 93/110 [01:03<00:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  85%|########5 | 94/110 [01:04<00:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  86%|########6 | 95/110 [01:04<00:10,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  87%|########7 | 96/110 [01:05<00:10,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  88%|########8 | 97/110 [01:06<00:09,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  89%|########9 | 98/110 [01:06<00:08,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  90%|######### | 99/110 [01:07<00:08,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  91%|######### | 100/110 [01:08<00:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  92%|#########1| 101/110 [01:09<00:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  93%|#########2| 102/110 [01:09<00:05,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  94%|#########3| 103/110 [01:10<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  95%|#########4| 104/110 [01:11<00:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  95%|#########5| 105/110 [01:11<00:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  96%|#########6| 106/110 [01:12<00:02,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0296 ||:  97%|#########7| 107/110 [01:13<00:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  98%|#########8| 108/110 [01:13<00:01,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  99%|#########9| 109/110 [01:14<00:00,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||: 100%|##########| 110/110 [01:14<00:00,  1.69it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7126, acc: 0.7188, no_result: 0.0625, loss: 1.2210 ||:   4%|4         | 1/24 [00:00<00:08,  2.65it/s]
+BLEU: 0.6909, acc: 0.7188, no_result: 0.0781, loss: 1.1211 ||:   8%|8         | 2/24 [00:00<00:08,  2.73it/s]
+BLEU: 0.6847, acc: 0.6771, no_result: 0.1042, loss: 1.2369 ||:  12%|#2        | 3/24 [00:01<00:07,  2.77it/s]
+BLEU: 0.7102, acc: 0.6797, no_result: 0.1094, loss: 1.1522 ||:  17%|#6        | 4/24 [00:01<00:07,  2.67it/s]
+BLEU: 0.7316, acc: 0.7063, no_result: 0.1125, loss: 1.1120 ||:  21%|##        | 5/24 [00:01<00:07,  2.64it/s]
+BLEU: 0.7342, acc: 0.6927, no_result: 0.1042, loss: 1.1114 ||:  25%|##5       | 6/24 [00:02<00:06,  2.73it/s]
+BLEU: 0.7403, acc: 0.7009, no_result: 0.0982, loss: 1.0930 ||:  29%|##9       | 7/24 [00:02<00:05,  2.85it/s]
+BLEU: 0.7400, acc: 0.7070, no_result: 0.0977, loss: 1.1219 ||:  33%|###3      | 8/24 [00:02<00:05,  2.89it/s]
+BLEU: 0.7304, acc: 0.7153, no_result: 0.0972, loss: 1.1175 ||:  38%|###7      | 9/24 [00:03<00:05,  2.81it/s]
+BLEU: 0.7409, acc: 0.7031, no_result: 0.0969, loss: 1.1349 ||:  42%|####1     | 10/24 [00:03<00:05,  2.74it/s]
+BLEU: 0.7400, acc: 0.6847, no_result: 0.1051, loss: 1.1867 ||:  46%|####5     | 11/24 [00:04<00:04,  2.62it/s]
+BLEU: 0.7331, acc: 0.6823, no_result: 0.1016, loss: 1.1962 ||:  50%|#####     | 12/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7273, acc: 0.6851, no_result: 0.1058, loss: 1.2201 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.63it/s]
+BLEU: 0.7271, acc: 0.6696, no_result: 0.1071, loss: 1.2726 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.45it/s]
+BLEU: 0.7121, acc: 0.6500, no_result: 0.1292, loss: 1.3291 ||:  62%|######2   | 15/24 [00:05<00:04,  2.19it/s]
+BLEU: 0.7099, acc: 0.6426, no_result: 0.1309, loss: 1.3546 ||:  67%|######6   | 16/24 [00:06<00:03,  2.12it/s]
+BLEU: 0.7060, acc: 0.6342, no_result: 0.1397, loss: 1.3448 ||:  71%|#######   | 17/24 [00:06<00:03,  2.14it/s]
+BLEU: 0.7031, acc: 0.6319, no_result: 0.1441, loss: 1.3544 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.11it/s]
+BLEU: 0.7054, acc: 0.6365, no_result: 0.1398, loss: 1.3367 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.20it/s]
+BLEU: 0.7083, acc: 0.6422, no_result: 0.1344, loss: 1.3250 ||:  83%|########3 | 20/24 [00:08<00:01,  2.33it/s]
+BLEU: 0.7047, acc: 0.6369, no_result: 0.1295, loss: 1.3302 ||:  88%|########7 | 21/24 [00:08<00:01,  2.32it/s]
+BLEU: 0.7101, acc: 0.6335, no_result: 0.1264, loss: 1.3235 ||:  92%|#########1| 22/24 [00:08<00:00,  2.30it/s]
+BLEU: 0.7132, acc: 0.6399, no_result: 0.1223, loss: 1.2999 ||:  96%|#########5| 23/24 [00:09<00:00,  2.37it/s]
+BLEU: 0.7132, acc: 0.6364, no_result: 0.1218, loss: 1.3180 ||: 100%|##########| 24/24 [00:09<00:00,  2.91it/s]
+BLEU: 0.7132, acc: 0.6364, no_result: 0.1218, loss: 1.3180 ||: 100%|##########| 24/24 [00:09<00:00,  2.52it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:   1%|          | 1/110 [00:00<01:22,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0376 ||:   2%|1         | 2/110 [00:01<01:16,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0322 ||:   3%|2         | 3/110 [00:01<01:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:   4%|3         | 4/110 [00:02<01:08,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:   5%|4         | 5/110 [00:03<01:13,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0276 ||:   5%|5         | 6/110 [00:04<01:12,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0269 ||:   6%|6         | 7/110 [00:04<01:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0252 ||:   7%|7         | 8/110 [00:05<01:15,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:   8%|8         | 9/110 [00:06<01:10,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:   9%|9         | 10/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0241 ||:  10%|#         | 11/110 [00:07<01:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:  11%|#         | 12/110 [00:08<01:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  12%|#1        | 13/110 [00:08<01:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  13%|#2        | 14/110 [00:09<01:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0238 ||:  14%|#3        | 15/110 [00:10<01:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0255 ||:  15%|#4        | 16/110 [00:10<01:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  15%|#5        | 17/110 [00:11<00:59,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  16%|#6        | 18/110 [00:12<01:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0297 ||:  17%|#7        | 19/110 [00:12<00:59,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  18%|#8        | 20/110 [00:13<00:57,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  19%|#9        | 21/110 [00:14<00:57,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0300 ||:  20%|##        | 22/110 [00:14<00:58,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0298 ||:  21%|##        | 23/110 [00:15<00:57,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  22%|##1       | 24/110 [00:16<00:57,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  23%|##2       | 25/110 [00:16<00:54,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  24%|##3       | 26/110 [00:17<00:54,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  25%|##4       | 27/110 [00:18<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  25%|##5       | 28/110 [00:18<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  26%|##6       | 29/110 [00:19<00:55,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  27%|##7       | 30/110 [00:20<01:13,  1.08it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  28%|##8       | 31/110 [00:21<01:05,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  29%|##9       | 32/110 [00:22<01:03,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  30%|###       | 33/110 [00:23<01:00,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  31%|###       | 34/110 [00:23<00:55,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  32%|###1      | 35/110 [00:24<00:52,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0290 ||:  33%|###2      | 36/110 [00:24<00:51,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  34%|###3      | 37/110 [00:25<00:53,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  35%|###4      | 38/110 [00:26<00:49,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  35%|###5      | 39/110 [00:27<00:53,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  36%|###6      | 40/110 [00:27<00:50,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  37%|###7      | 41/110 [00:28<00:48,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  38%|###8      | 42/110 [00:29<00:45,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  39%|###9      | 43/110 [00:29<00:43,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  40%|####      | 44/110 [00:30<00:44,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  41%|####      | 45/110 [00:31<00:42,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  42%|####1     | 46/110 [00:31<00:41,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  43%|####2     | 47/110 [00:32<00:41,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  44%|####3     | 48/110 [00:32<00:39,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  45%|####4     | 49/110 [00:33<00:38,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:  45%|####5     | 50/110 [00:34<00:37,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  46%|####6     | 51/110 [00:34<00:36,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0280 ||:  47%|####7     | 52/110 [00:35<00:37,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  48%|####8     | 53/110 [00:36<00:36,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  49%|####9     | 54/110 [00:36<00:35,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  50%|#####     | 55/110 [00:37<00:36,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  51%|#####     | 56/110 [00:38<00:36,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  52%|#####1    | 57/110 [00:38<00:34,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  53%|#####2    | 58/110 [00:39<00:33,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  55%|#####4    | 60/110 [00:40<00:34,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  56%|#####6    | 62/110 [00:42<00:33,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  58%|#####8    | 64/110 [00:43<00:30,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  59%|#####9    | 65/110 [00:44<00:29,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0291 ||:  60%|######    | 66/110 [00:44<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  61%|######    | 67/110 [00:45<00:28,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  62%|######1   | 68/110 [00:46<00:29,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  63%|######2   | 69/110 [00:47<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  64%|######3   | 70/110 [00:47<00:27,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  65%|######4   | 71/110 [00:48<00:27,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  65%|######5   | 72/110 [00:49<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  66%|######6   | 73/110 [00:49<00:24,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  67%|######7   | 74/110 [00:50<00:23,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0289 ||:  68%|######8   | 75/110 [00:51<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  69%|######9   | 76/110 [00:51<00:24,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0288 ||:  70%|#######   | 77/110 [00:52<00:23,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  71%|#######   | 78/110 [00:53<00:21,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  72%|#######1  | 79/110 [00:53<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  74%|#######3  | 81/110 [00:55<00:20,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  78%|#######8  | 86/110 [00:58<00:15,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:  79%|#######9  | 87/110 [00:59<00:14,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  80%|########  | 88/110 [00:59<00:14,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  81%|########  | 89/110 [01:00<00:14,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  82%|########1 | 90/110 [01:01<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  83%|########2 | 91/110 [01:01<00:12,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  84%|########3 | 92/110 [01:02<00:11,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  85%|########4 | 93/110 [01:03<00:10,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  85%|########5 | 94/110 [01:03<00:10,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  86%|########6 | 95/110 [01:04<00:09,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  87%|########7 | 96/110 [01:04<00:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  88%|########8 | 97/110 [01:05<00:08,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  89%|########9 | 98/110 [01:06<00:07,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0292 ||:  90%|######### | 99/110 [01:06<00:07,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  91%|######### | 100/110 [01:07<00:06,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  92%|#########1| 101/110 [01:08<00:05,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  93%|#########2| 102/110 [01:08<00:05,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  94%|#########3| 103/110 [01:09<00:04,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||:  95%|#########4| 104/110 [01:10<00:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  95%|#########5| 105/110 [01:10<00:03,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0295 ||:  96%|#########6| 106/110 [01:11<00:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  97%|#########7| 107/110 [01:12<00:02,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0296 ||:  98%|#########8| 108/110 [01:13<00:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0294 ||:  99%|#########9| 109/110 [01:13<00:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||: 100%|##########| 110/110 [01:13<00:00,  1.74it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0293 ||: 100%|##########| 110/110 [01:13<00:00,  1.49it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6875, no_result: 0.0625, loss: 1.1884 ||:   4%|4         | 1/24 [00:00<00:08,  2.72it/s]
+BLEU: 0.6875, acc: 0.7031, no_result: 0.0781, loss: 1.1203 ||:   8%|8         | 2/24 [00:00<00:07,  2.80it/s]
+BLEU: 0.6819, acc: 0.6667, no_result: 0.1042, loss: 1.2506 ||:  12%|#2        | 3/24 [00:01<00:07,  2.83it/s]
+BLEU: 0.7082, acc: 0.6641, no_result: 0.1172, loss: 1.1720 ||:  17%|#6        | 4/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.7357, acc: 0.6937, no_result: 0.1125, loss: 1.1197 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7334, acc: 0.6667, no_result: 0.1042, loss: 1.1213 ||:  25%|##5       | 6/24 [00:02<00:06,  2.79it/s]
+BLEU: 0.7427, acc: 0.6786, no_result: 0.0982, loss: 1.1003 ||:  29%|##9       | 7/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7421, acc: 0.6836, no_result: 0.1016, loss: 1.1277 ||:  33%|###3      | 8/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.7337, acc: 0.6944, no_result: 0.1007, loss: 1.1214 ||:  38%|###7      | 9/24 [00:03<00:05,  2.89it/s]
+BLEU: 0.7426, acc: 0.6781, no_result: 0.1031, loss: 1.1413 ||:  42%|####1     | 10/24 [00:03<00:04,  2.81it/s]
+BLEU: 0.7402, acc: 0.6619, no_result: 0.1108, loss: 1.1941 ||:  46%|####5     | 11/24 [00:03<00:04,  2.69it/s]
+BLEU: 0.7334, acc: 0.6693, no_result: 0.1042, loss: 1.2067 ||:  50%|#####     | 12/24 [00:04<00:04,  2.78it/s]
+BLEU: 0.7292, acc: 0.6683, no_result: 0.1058, loss: 1.2274 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.7296, acc: 0.6607, no_result: 0.1071, loss: 1.2785 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.58it/s]
+BLEU: 0.7174, acc: 0.6396, no_result: 0.1250, loss: 1.3358 ||:  62%|######2   | 15/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.7159, acc: 0.6328, no_result: 0.1270, loss: 1.3578 ||:  67%|######6   | 16/24 [00:06<00:03,  2.37it/s]
+BLEU: 0.7129, acc: 0.6250, no_result: 0.1342, loss: 1.3461 ||:  71%|#######   | 17/24 [00:06<00:02,  2.42it/s]
+BLEU: 0.7102, acc: 0.6233, no_result: 0.1389, loss: 1.3558 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.41it/s]
+BLEU: 0.7122, acc: 0.6283, no_result: 0.1349, loss: 1.3371 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.51it/s]
+BLEU: 0.7149, acc: 0.6344, no_result: 0.1297, loss: 1.3242 ||:  83%|########3 | 20/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7130, acc: 0.6324, no_result: 0.1250, loss: 1.3306 ||:  88%|########7 | 21/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.7188, acc: 0.6293, no_result: 0.1222, loss: 1.3235 ||:  92%|#########1| 22/24 [00:08<00:00,  2.63it/s]
+BLEU: 0.7221, acc: 0.6372, no_result: 0.1182, loss: 1.2997 ||:  96%|#########5| 23/24 [00:08<00:00,  2.71it/s]
+BLEU: 0.7221, acc: 0.6338, no_result: 0.1179, loss: 1.3173 ||: 100%|##########| 24/24 [00:08<00:00,  3.36it/s]
+BLEU: 0.7221, acc: 0.6338, no_result: 0.1179, loss: 1.3173 ||: 100%|##########| 24/24 [00:08<00:00,  2.74it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0379 ||:   1%|          | 1/110 [00:00<01:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0313 ||:   2%|1         | 2/110 [00:01<01:11,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:   3%|2         | 3/110 [00:02<01:17,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0263 ||:   4%|3         | 4/110 [00:02<01:13,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0236 ||:   5%|4         | 5/110 [00:03<01:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0241 ||:   5%|5         | 6/110 [00:04<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:   6%|6         | 7/110 [00:04<01:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:   7%|7         | 8/110 [00:05<01:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0279 ||:   8%|8         | 9/110 [00:06<01:15,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:   9%|9         | 10/110 [00:07<01:13,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  10%|#         | 11/110 [00:07<01:13,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  11%|#         | 12/110 [00:08<01:11,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  12%|#1        | 13/110 [00:09<01:09,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  13%|#2        | 14/110 [00:09<01:08,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:  14%|#3        | 15/110 [00:10<01:10,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0254 ||:  15%|#4        | 16/110 [00:11<01:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  15%|#5        | 17/110 [00:12<01:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  16%|#6        | 18/110 [00:12<01:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  17%|#7        | 19/110 [00:13<01:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  18%|#8        | 20/110 [00:15<01:32,  1.02s/it]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  19%|#9        | 21/110 [00:15<01:21,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  20%|##        | 22/110 [00:16<01:12,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  21%|##        | 23/110 [00:17<01:11,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  22%|##1       | 24/110 [00:17<01:05,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  23%|##2       | 25/110 [00:18<01:02,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  24%|##3       | 26/110 [00:19<01:03,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:  25%|##4       | 27/110 [00:20<01:02,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:  25%|##5       | 28/110 [00:20<01:00,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0254 ||:  26%|##6       | 29/110 [00:21<00:59,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0252 ||:  27%|##7       | 30/110 [00:22<00:58,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  28%|##8       | 31/110 [00:23<00:56,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  29%|##9       | 32/110 [00:23<00:56,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  30%|###       | 33/110 [00:24<01:01,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:  31%|###       | 34/110 [00:25<00:58,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  32%|###1      | 35/110 [00:26<00:54,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0254 ||:  33%|###2      | 36/110 [00:26<00:55,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:  34%|###3      | 37/110 [00:27<00:57,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  35%|###4      | 38/110 [00:28<00:54,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0255 ||:  35%|###5      | 39/110 [00:29<00:53,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  36%|###6      | 40/110 [00:29<00:51,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  37%|###7      | 41/110 [00:30<00:51,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  38%|###8      | 42/110 [00:31<00:49,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  39%|###9      | 43/110 [00:32<00:48,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  40%|####      | 44/110 [00:32<00:47,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0263 ||:  41%|####      | 45/110 [00:33<00:46,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  42%|####1     | 46/110 [00:34<00:45,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  43%|####2     | 47/110 [00:34<00:44,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  44%|####3     | 48/110 [00:35<00:45,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  45%|####4     | 49/110 [00:36<00:44,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  45%|####5     | 50/110 [00:37<00:42,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  46%|####6     | 51/110 [00:37<00:41,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  47%|####7     | 52/110 [00:38<00:41,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  48%|####8     | 53/110 [00:39<00:41,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  49%|####9     | 54/110 [00:40<00:41,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0268 ||:  50%|#####     | 55/110 [00:40<00:38,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  51%|#####     | 56/110 [00:41<00:38,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0267 ||:  52%|#####1    | 57/110 [00:42<00:36,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0268 ||:  53%|#####2    | 58/110 [00:42<00:35,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0268 ||:  54%|#####3    | 59/110 [00:43<00:37,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0268 ||:  55%|#####4    | 60/110 [00:44<00:36,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0268 ||:  55%|#####5    | 61/110 [00:44<00:35,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:  56%|#####6    | 62/110 [00:45<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0272 ||:  57%|#####7    | 63/110 [00:46<00:35,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0271 ||:  58%|#####8    | 64/110 [00:47<00:34,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:  59%|#####9    | 65/110 [00:47<00:32,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0269 ||:  60%|######    | 66/110 [00:48<00:30,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0271 ||:  61%|######    | 67/110 [00:49<00:29,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0271 ||:  62%|######1   | 68/110 [00:49<00:28,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0273 ||:  63%|######2   | 69/110 [00:50<00:29,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0275 ||:  64%|######3   | 70/110 [00:51<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0273 ||:  65%|######4   | 71/110 [00:51<00:25,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0275 ||:  65%|######5   | 72/110 [00:52<00:25,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0278 ||:  66%|######6   | 73/110 [00:53<00:25,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0277 ||:  67%|######7   | 74/110 [00:53<00:23,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0276 ||:  68%|######8   | 75/110 [00:54<00:23,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0278 ||:  69%|######9   | 76/110 [00:55<00:22,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0277 ||:  70%|#######   | 77/110 [00:56<00:23,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0275 ||:  71%|#######   | 78/110 [00:56<00:22,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0274 ||:  72%|#######1  | 79/110 [00:57<00:21,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0275 ||:  73%|#######2  | 80/110 [00:58<00:21,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0277 ||:  74%|#######3  | 81/110 [00:58<00:19,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0277 ||:  75%|#######4  | 82/110 [00:59<00:18,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0275 ||:  75%|#######5  | 83/110 [01:00<00:18,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0274 ||:  76%|#######6  | 84/110 [01:00<00:18,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0275 ||:  77%|#######7  | 85/110 [01:01<00:17,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0275 ||:  78%|#######8  | 86/110 [01:02<00:17,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0277 ||:  79%|#######9  | 87/110 [01:03<00:16,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0276 ||:  80%|########  | 88/110 [01:03<00:15,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0277 ||:  81%|########  | 89/110 [01:04<00:15,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0277 ||:  82%|########1 | 90/110 [01:05<00:14,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0278 ||:  83%|########2 | 91/110 [01:06<00:14,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0279 ||:  84%|########3 | 92/110 [01:06<00:13,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0281 ||:  85%|########4 | 93/110 [01:07<00:12,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0280 ||:  85%|########5 | 94/110 [01:08<00:11,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0279 ||:  86%|########6 | 95/110 [01:09<00:11,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  87%|########7 | 96/110 [01:09<00:10,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0281 ||:  88%|########8 | 97/110 [01:10<00:09,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  89%|########9 | 98/110 [01:11<00:09,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  90%|######### | 99/110 [01:12<00:08,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0282 ||:  91%|######### | 100/110 [01:12<00:07,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  92%|#########1| 101/110 [01:13<00:06,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0283 ||:  93%|#########2| 102/110 [01:14<00:05,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:  94%|#########3| 103/110 [01:15<00:05,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0284 ||:  95%|#########4| 104/110 [01:15<00:04,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  95%|#########5| 105/110 [01:16<00:03,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  96%|#########6| 106/110 [01:17<00:02,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0285 ||:  97%|#########7| 107/110 [01:17<00:02,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0286 ||:  98%|#########8| 108/110 [01:18<00:01,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||:  99%|#########9| 109/110 [01:19<00:00,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||: 100%|##########| 110/110 [01:19<00:00,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0287 ||: 100%|##########| 110/110 [01:19<00:00,  1.38it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6562, no_result: 0.0625, loss: 1.2337 ||:   4%|4         | 1/24 [00:00<00:09,  2.34it/s]
+BLEU: 0.6867, acc: 0.7031, no_result: 0.0781, loss: 1.1343 ||:   8%|8         | 2/24 [00:00<00:09,  2.43it/s]
+BLEU: 0.6710, acc: 0.6667, no_result: 0.1146, loss: 1.2737 ||:  12%|#2        | 3/24 [00:01<00:08,  2.43it/s]
+BLEU: 0.7004, acc: 0.6719, no_result: 0.1172, loss: 1.1844 ||:  17%|#6        | 4/24 [00:01<00:08,  2.39it/s]
+BLEU: 0.7258, acc: 0.6937, no_result: 0.1187, loss: 1.1522 ||:  21%|##        | 5/24 [00:02<00:08,  2.36it/s]
+BLEU: 0.7215, acc: 0.6719, no_result: 0.1146, loss: 1.1464 ||:  25%|##5       | 6/24 [00:02<00:07,  2.43it/s]
+BLEU: 0.7341, acc: 0.6830, no_result: 0.1071, loss: 1.1267 ||:  29%|##9       | 7/24 [00:02<00:06,  2.54it/s]
+BLEU: 0.7306, acc: 0.6836, no_result: 0.1055, loss: 1.1479 ||:  33%|###3      | 8/24 [00:03<00:06,  2.56it/s]
+BLEU: 0.7213, acc: 0.6944, no_result: 0.1076, loss: 1.1410 ||:  38%|###7      | 9/24 [00:03<00:06,  2.49it/s]
+BLEU: 0.7307, acc: 0.6750, no_result: 0.1062, loss: 1.1603 ||:  42%|####1     | 10/24 [00:04<00:05,  2.41it/s]
+BLEU: 0.7291, acc: 0.6562, no_result: 0.1108, loss: 1.2107 ||:  46%|####5     | 11/24 [00:04<00:05,  2.30it/s]
+BLEU: 0.7252, acc: 0.6589, no_result: 0.1068, loss: 1.2252 ||:  50%|#####     | 12/24 [00:04<00:05,  2.37it/s]
+BLEU: 0.7245, acc: 0.6562, no_result: 0.1130, loss: 1.2442 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.32it/s]
+BLEU: 0.7254, acc: 0.6429, no_result: 0.1138, loss: 1.2978 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.23it/s]
+BLEU: 0.7135, acc: 0.6250, no_result: 0.1333, loss: 1.3497 ||:  62%|######2   | 15/24 [00:06<00:04,  2.08it/s]
+BLEU: 0.7107, acc: 0.6250, no_result: 0.1328, loss: 1.3748 ||:  67%|######6   | 16/24 [00:06<00:03,  2.05it/s]
+BLEU: 0.7083, acc: 0.6158, no_result: 0.1415, loss: 1.3643 ||:  71%|#######   | 17/24 [00:07<00:03,  2.10it/s]
+BLEU: 0.7053, acc: 0.6146, no_result: 0.1458, loss: 1.3739 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.07it/s]
+BLEU: 0.7071, acc: 0.6217, no_result: 0.1414, loss: 1.3527 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.18it/s]
+BLEU: 0.7100, acc: 0.6266, no_result: 0.1375, loss: 1.3416 ||:  83%|########3 | 20/24 [00:08<00:01,  2.30it/s]
+BLEU: 0.7080, acc: 0.6250, no_result: 0.1324, loss: 1.3469 ||:  88%|########7 | 21/24 [00:09<00:01,  2.31it/s]
+BLEU: 0.7127, acc: 0.6207, no_result: 0.1293, loss: 1.3397 ||:  92%|#########1| 22/24 [00:09<00:00,  2.28it/s]
+BLEU: 0.7158, acc: 0.6291, no_result: 0.1236, loss: 1.3158 ||:  96%|#########5| 23/24 [00:09<00:00,  2.36it/s]
+BLEU: 0.7161, acc: 0.6306, no_result: 0.1231, loss: 1.3356 ||: 100%|##########| 24/24 [00:10<00:00,  2.90it/s]
+BLEU: 0.7161, acc: 0.6306, no_result: 0.1231, loss: 1.3356 ||: 100%|##########| 24/24 [00:10<00:00,  2.37it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:   1%|          | 1/110 [00:00<01:09,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:   2%|1         | 2/110 [00:01<01:07,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:   3%|2         | 3/110 [00:01<01:04,  1.66it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:   4%|3         | 4/110 [00:02<01:17,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:   5%|4         | 5/110 [00:03<01:13,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0233 ||:   5%|5         | 6/110 [00:04<01:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0233 ||:   6%|6         | 7/110 [00:04<01:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:   7%|7         | 8/110 [00:05<01:07,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||:   8%|8         | 9/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:   9%|9         | 10/110 [00:07<01:30,  1.11it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  10%|#         | 11/110 [00:08<01:25,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0263 ||:  11%|#         | 12/110 [00:08<01:18,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  12%|#1        | 13/110 [00:09<01:17,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0253 ||:  13%|#2        | 14/110 [00:10<01:11,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0254 ||:  14%|#3        | 15/110 [00:10<01:06,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:  15%|#4        | 16/110 [00:11<01:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  15%|#5        | 17/110 [00:12<01:03,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  16%|#6        | 18/110 [00:12<01:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  17%|#7        | 19/110 [00:13<01:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0252 ||:  18%|#8        | 20/110 [00:14<00:57,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  19%|#9        | 21/110 [00:14<00:53,  1.65it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  20%|##        | 22/110 [00:15<00:55,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  21%|##        | 23/110 [00:15<00:54,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  22%|##1       | 24/110 [00:16<00:56,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:  23%|##2       | 25/110 [00:17<00:56,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0254 ||:  24%|##3       | 26/110 [00:18<00:56,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:  25%|##4       | 27/110 [00:18<00:53,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0252 ||:  25%|##5       | 28/110 [00:19<00:51,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  26%|##6       | 29/110 [00:19<00:52,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  27%|##7       | 30/110 [00:20<00:54,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  28%|##8       | 31/110 [00:21<00:54,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  29%|##9       | 32/110 [00:22<00:52,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  30%|###       | 33/110 [00:22<00:54,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  31%|###       | 34/110 [00:23<00:52,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  32%|###1      | 35/110 [00:24<00:52,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0241 ||:  33%|###2      | 36/110 [00:24<00:53,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0239 ||:  34%|###3      | 37/110 [00:25<00:50,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0241 ||:  35%|###4      | 38/110 [00:26<00:49,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  35%|###5      | 39/110 [00:26<00:49,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  36%|###6      | 40/110 [00:27<00:47,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  37%|###7      | 41/110 [00:28<00:47,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  38%|###8      | 42/110 [00:28<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  39%|###9      | 43/110 [00:29<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  40%|####      | 44/110 [00:30<00:44,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  41%|####      | 45/110 [00:30<00:43,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  42%|####1     | 46/110 [00:31<00:43,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:  43%|####2     | 47/110 [00:32<00:43,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:  44%|####3     | 48/110 [00:33<00:41,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0253 ||:  45%|####4     | 49/110 [00:33<00:40,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:  45%|####5     | 50/110 [00:34<00:39,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  46%|####6     | 51/110 [00:34<00:38,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  47%|####7     | 52/110 [00:35<00:39,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  48%|####8     | 53/110 [00:36<00:39,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  49%|####9     | 54/110 [00:37<00:40,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  50%|#####     | 55/110 [00:37<00:40,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  51%|#####     | 56/110 [00:38<00:38,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  52%|#####1    | 57/110 [00:39<00:39,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  53%|#####2    | 58/110 [00:40<00:37,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0255 ||:  54%|#####3    | 59/110 [00:40<00:36,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  55%|#####4    | 60/110 [00:41<00:34,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  55%|#####5    | 61/110 [00:42<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  56%|#####6    | 62/110 [00:42<00:31,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  57%|#####7    | 63/110 [00:43<00:30,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  58%|#####8    | 64/110 [00:43<00:28,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  59%|#####9    | 65/110 [00:44<00:29,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  60%|######    | 66/110 [00:45<00:28,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  61%|######    | 67/110 [00:45<00:27,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  62%|######1   | 68/110 [00:46<00:26,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:  63%|######2   | 69/110 [00:47<00:26,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:  64%|######3   | 70/110 [00:47<00:25,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  65%|######4   | 71/110 [00:48<00:23,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  65%|######5   | 72/110 [00:48<00:22,  1.68it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  66%|######6   | 73/110 [00:49<00:22,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:  67%|######7   | 74/110 [00:50<00:22,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  68%|######8   | 75/110 [00:50<00:22,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  69%|######9   | 76/110 [00:51<00:22,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  70%|#######   | 77/110 [00:52<00:20,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  71%|#######   | 78/110 [00:52<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  72%|#######1  | 79/110 [00:53<00:21,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  74%|#######3  | 81/110 [00:54<00:18,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  75%|#######5  | 83/110 [00:56<00:18,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  77%|#######7  | 85/110 [00:57<00:18,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  80%|########  | 88/110 [00:59<00:14,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  81%|########  | 89/110 [01:00<00:13,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  82%|########1 | 90/110 [01:00<00:12,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  83%|########2 | 91/110 [01:01<00:12,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  84%|########3 | 92/110 [01:02<00:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  85%|########4 | 93/110 [01:02<00:10,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0267 ||:  85%|########5 | 94/110 [01:03<00:10,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0267 ||:  86%|########6 | 95/110 [01:04<00:09,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  87%|########7 | 96/110 [01:05<00:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  88%|########8 | 97/110 [01:05<00:08,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  89%|########9 | 98/110 [01:06<00:08,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  90%|######### | 99/110 [01:06<00:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  91%|######### | 100/110 [01:07<00:06,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  92%|#########1| 101/110 [01:08<00:05,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  93%|#########2| 102/110 [01:08<00:05,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0267 ||:  94%|#########3| 103/110 [01:09<00:04,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0269 ||:  95%|#########4| 104/110 [01:10<00:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0269 ||:  95%|#########5| 105/110 [01:10<00:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0270 ||:  96%|#########6| 106/110 [01:11<00:03,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0272 ||:  97%|#########7| 107/110 [01:12<00:02,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0273 ||:  98%|#########8| 108/110 [01:13<00:01,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0272 ||:  99%|#########9| 109/110 [01:14<00:00,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0278 ||: 100%|##########| 110/110 [01:15<00:00,  1.14it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0278 ||: 100%|##########| 110/110 [01:15<00:00,  1.46it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6875, no_result: 0.0625, loss: 1.2282 ||:   4%|4         | 1/24 [00:00<00:09,  2.43it/s]
+BLEU: 0.6928, acc: 0.7031, no_result: 0.0938, loss: 1.1632 ||:   8%|8         | 2/24 [00:00<00:08,  2.68it/s]
+BLEU: 0.6838, acc: 0.6771, no_result: 0.1250, loss: 1.2850 ||:  12%|#2        | 3/24 [00:01<00:07,  2.77it/s]
+BLEU: 0.7099, acc: 0.6797, no_result: 0.1250, loss: 1.1942 ||:  17%|#6        | 4/24 [00:01<00:07,  2.71it/s]
+BLEU: 0.7375, acc: 0.7063, no_result: 0.1187, loss: 1.1360 ||:  21%|##        | 5/24 [00:01<00:07,  2.69it/s]
+BLEU: 0.7373, acc: 0.6875, no_result: 0.1198, loss: 1.1294 ||:  25%|##5       | 6/24 [00:02<00:06,  2.79it/s]
+BLEU: 0.7422, acc: 0.6920, no_result: 0.1116, loss: 1.1122 ||:  29%|##9       | 7/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7368, acc: 0.6953, no_result: 0.1133, loss: 1.1356 ||:  33%|###3      | 8/24 [00:02<00:05,  2.96it/s]
+BLEU: 0.7267, acc: 0.7014, no_result: 0.1111, loss: 1.1300 ||:  38%|###7      | 9/24 [00:03<00:05,  2.89it/s]
+BLEU: 0.7372, acc: 0.6875, no_result: 0.1031, loss: 1.1455 ||:  42%|####1     | 10/24 [00:03<00:04,  2.83it/s]
+BLEU: 0.7355, acc: 0.6733, no_result: 0.1051, loss: 1.1956 ||:  46%|####5     | 11/24 [00:03<00:04,  2.72it/s]
+BLEU: 0.7306, acc: 0.6823, no_result: 0.0990, loss: 1.2094 ||:  50%|#####     | 12/24 [00:04<00:04,  2.81it/s]
+BLEU: 0.7299, acc: 0.6827, no_result: 0.1034, loss: 1.2348 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.72it/s]
+BLEU: 0.7308, acc: 0.6696, no_result: 0.1004, loss: 1.2802 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.62it/s]
+BLEU: 0.7213, acc: 0.6521, no_result: 0.1187, loss: 1.3402 ||:  62%|######2   | 15/24 [00:05<00:03,  2.42it/s]
+BLEU: 0.7194, acc: 0.6465, no_result: 0.1211, loss: 1.3646 ||:  67%|######6   | 16/24 [00:06<00:03,  2.39it/s]
+BLEU: 0.7186, acc: 0.6415, no_result: 0.1268, loss: 1.3546 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.7156, acc: 0.6372, no_result: 0.1337, loss: 1.3658 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.7171, acc: 0.6414, no_result: 0.1299, loss: 1.3454 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.7186, acc: 0.6453, no_result: 0.1250, loss: 1.3342 ||:  83%|########3 | 20/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.7145, acc: 0.6399, no_result: 0.1205, loss: 1.3401 ||:  88%|########7 | 21/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.7196, acc: 0.6378, no_result: 0.1179, loss: 1.3339 ||:  92%|#########1| 22/24 [00:08<00:00,  2.66it/s]
+BLEU: 0.7235, acc: 0.6440, no_result: 0.1141, loss: 1.3102 ||:  96%|#########5| 23/24 [00:08<00:00,  2.74it/s]
+BLEU: 0.7235, acc: 0.6403, no_result: 0.1140, loss: 1.3302 ||: 100%|##########| 24/24 [00:08<00:00,  3.40it/s]
+BLEU: 0.7235, acc: 0.6403, no_result: 0.1140, loss: 1.3302 ||: 100%|##########| 24/24 [00:08<00:00,  2.75it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0313 ||:   1%|          | 1/110 [00:00<01:10,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0237 ||:   2%|1         | 2/110 [00:01<01:29,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0312 ||:   3%|2         | 3/110 [00:02<01:19,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0281 ||:   4%|3         | 4/110 [00:02<01:12,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:   5%|4         | 5/110 [00:03<01:08,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:   5%|5         | 6/110 [00:04<01:14,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:   6%|6         | 7/110 [00:04<01:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:   7%|7         | 8/110 [00:05<01:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:   8%|8         | 9/110 [00:06<01:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0233 ||:   9%|9         | 10/110 [00:06<01:05,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  10%|#         | 11/110 [00:07<01:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  11%|#         | 12/110 [00:08<01:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0233 ||:  12%|#1        | 13/110 [00:08<01:05,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||:  13%|#2        | 14/110 [00:09<01:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0239 ||:  14%|#3        | 15/110 [00:10<01:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  15%|#4        | 16/110 [00:11<01:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0234 ||:  15%|#5        | 17/110 [00:11<01:05,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  16%|#6        | 18/110 [00:12<01:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  17%|#7        | 19/110 [00:13<00:59,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0233 ||:  18%|#8        | 20/110 [00:13<01:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0234 ||:  19%|#9        | 21/110 [00:14<00:58,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  20%|##        | 22/110 [00:15<00:58,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0239 ||:  21%|##        | 23/110 [00:15<00:58,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  22%|##1       | 24/110 [00:16<00:59,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0241 ||:  23%|##2       | 25/110 [00:17<01:02,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0234 ||:  24%|##3       | 26/110 [00:17<00:59,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0235 ||:  25%|##4       | 27/110 [00:18<00:56,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  25%|##5       | 28/110 [00:19<00:57,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  26%|##6       | 29/110 [00:20<00:59,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  27%|##7       | 30/110 [00:20<00:57,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  28%|##8       | 31/110 [00:21<00:56,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  29%|##9       | 32/110 [00:22<00:54,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:  30%|###       | 33/110 [00:22<00:55,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  31%|###       | 34/110 [00:23<00:52,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  32%|###1      | 35/110 [00:24<00:49,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  33%|###2      | 36/110 [00:24<00:48,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0253 ||:  34%|###3      | 37/110 [00:25<00:48,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  35%|###4      | 38/110 [00:26<00:46,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  35%|###5      | 39/110 [00:26<00:49,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  36%|###6      | 40/110 [00:27<00:46,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  37%|###7      | 41/110 [00:28<00:46,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  38%|###8      | 42/110 [00:28<00:47,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0241 ||:  39%|###9      | 43/110 [00:29<00:44,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  40%|####      | 44/110 [00:30<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  41%|####      | 45/110 [00:30<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  42%|####1     | 46/110 [00:31<00:43,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  43%|####2     | 47/110 [00:32<00:41,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  44%|####3     | 48/110 [00:32<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  45%|####4     | 49/110 [00:33<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  45%|####5     | 50/110 [00:34<00:39,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  46%|####6     | 51/110 [00:34<00:38,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  47%|####7     | 52/110 [00:35<00:40,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  48%|####8     | 53/110 [00:36<00:38,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  49%|####9     | 54/110 [00:36<00:38,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0253 ||:  50%|#####     | 55/110 [00:37<00:36,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0253 ||:  51%|#####     | 56/110 [00:38<00:35,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:  52%|#####1    | 57/110 [00:38<00:33,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  54%|#####3    | 59/110 [00:40<00:35,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  55%|#####4    | 60/110 [00:40<00:34,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  55%|#####5    | 61/110 [00:41<00:34,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  56%|#####6    | 62/110 [00:42<00:32,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  57%|#####7    | 63/110 [00:43<00:32,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  58%|#####8    | 64/110 [00:43<00:30,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  59%|#####9    | 65/110 [00:44<00:29,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  60%|######    | 66/110 [00:44<00:28,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  61%|######    | 67/110 [00:45<00:27,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  62%|######1   | 68/110 [00:46<00:26,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  63%|######2   | 69/110 [00:46<00:27,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  64%|######3   | 70/110 [00:47<00:27,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  65%|######4   | 71/110 [00:48<00:25,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  65%|######5   | 72/110 [00:49<00:26,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:  66%|######6   | 73/110 [00:49<00:24,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  67%|######7   | 74/110 [00:50<00:23,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  68%|######8   | 75/110 [00:50<00:23,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  69%|######9   | 76/110 [00:51<00:22,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  70%|#######   | 77/110 [00:52<00:21,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  71%|#######   | 78/110 [00:52<00:21,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  74%|#######3  | 81/110 [00:54<00:18,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  76%|#######6  | 84/110 [00:56<00:17,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  80%|########  | 88/110 [00:59<00:14,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  81%|########  | 89/110 [01:00<00:14,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  82%|########1 | 90/110 [01:00<00:13,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  83%|########2 | 91/110 [01:01<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0252 ||:  84%|########3 | 92/110 [01:02<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0253 ||:  85%|########4 | 93/110 [01:02<00:10,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0253 ||:  85%|########5 | 94/110 [01:03<00:10,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0254 ||:  86%|########6 | 95/110 [01:04<00:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0254 ||:  87%|########7 | 96/110 [01:04<00:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  88%|########8 | 97/110 [01:05<00:07,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  89%|########9 | 98/110 [01:05<00:07,  1.66it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  90%|######### | 99/110 [01:06<00:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  91%|######### | 100/110 [01:08<00:09,  1.08it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  92%|#########1| 101/110 [01:08<00:07,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  93%|#########2| 102/110 [01:09<00:06,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  94%|#########3| 103/110 [01:09<00:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  95%|#########4| 104/110 [01:10<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  95%|#########5| 105/110 [01:11<00:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  96%|#########6| 106/110 [01:11<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  97%|#########7| 107/110 [01:12<00:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  98%|#########8| 108/110 [01:13<00:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  99%|#########9| 109/110 [01:14<00:00,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||: 100%|##########| 110/110 [01:14<00:00,  1.68it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7266, acc: 0.7188, no_result: 0.0312, loss: 1.2383 ||:   4%|4         | 1/24 [00:00<00:08,  2.74it/s]
+BLEU: 0.6909, acc: 0.7344, no_result: 0.0625, loss: 1.1392 ||:   8%|8         | 2/24 [00:00<00:07,  2.82it/s]
+BLEU: 0.6826, acc: 0.6875, no_result: 0.1042, loss: 1.2696 ||:  12%|#2        | 3/24 [00:01<00:07,  2.84it/s]
+BLEU: 0.7076, acc: 0.6719, no_result: 0.1094, loss: 1.1840 ||:  17%|#6        | 4/24 [00:01<00:07,  2.75it/s]
+BLEU: 0.7348, acc: 0.6937, no_result: 0.1125, loss: 1.1297 ||:  21%|##        | 5/24 [00:01<00:07,  2.71it/s]
+BLEU: 0.7380, acc: 0.6771, no_result: 0.1042, loss: 1.1288 ||:  25%|##5       | 6/24 [00:02<00:06,  2.81it/s]
+BLEU: 0.7465, acc: 0.6875, no_result: 0.0982, loss: 1.1114 ||:  29%|##9       | 7/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.7415, acc: 0.6953, no_result: 0.0977, loss: 1.1402 ||:  33%|###3      | 8/24 [00:02<00:05,  2.98it/s]
+BLEU: 0.7312, acc: 0.6979, no_result: 0.1007, loss: 1.1366 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7396, acc: 0.6906, no_result: 0.0906, loss: 1.1518 ||:  42%|####1     | 10/24 [00:03<00:04,  2.85it/s]
+BLEU: 0.7380, acc: 0.6818, no_result: 0.0938, loss: 1.1995 ||:  46%|####5     | 11/24 [00:03<00:04,  2.73it/s]
+BLEU: 0.7325, acc: 0.6849, no_result: 0.0911, loss: 1.2146 ||:  50%|#####     | 12/24 [00:04<00:04,  2.82it/s]
+BLEU: 0.7312, acc: 0.6803, no_result: 0.0986, loss: 1.2364 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.73it/s]
+BLEU: 0.7310, acc: 0.6696, no_result: 0.0982, loss: 1.2839 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.63it/s]
+BLEU: 0.7187, acc: 0.6500, no_result: 0.1187, loss: 1.3427 ||:  62%|######2   | 15/24 [00:05<00:03,  2.44it/s]
+BLEU: 0.7147, acc: 0.6426, no_result: 0.1211, loss: 1.3661 ||:  67%|######6   | 16/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.7124, acc: 0.6397, no_result: 0.1250, loss: 1.3556 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.7097, acc: 0.6389, no_result: 0.1285, loss: 1.3640 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7114, acc: 0.6431, no_result: 0.1250, loss: 1.3449 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.7144, acc: 0.6484, no_result: 0.1203, loss: 1.3322 ||:  83%|########3 | 20/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.7105, acc: 0.6429, no_result: 0.1161, loss: 1.3367 ||:  88%|########7 | 21/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.7150, acc: 0.6406, no_result: 0.1136, loss: 1.3314 ||:  92%|#########1| 22/24 [00:08<00:00,  2.63it/s]
+BLEU: 0.7175, acc: 0.6481, no_result: 0.1087, loss: 1.3094 ||:  96%|#########5| 23/24 [00:08<00:00,  2.72it/s]
+BLEU: 0.7172, acc: 0.6442, no_result: 0.1088, loss: 1.3317 ||: 100%|##########| 24/24 [00:08<00:00,  3.37it/s]
+BLEU: 0.7172, acc: 0.6442, no_result: 0.1088, loss: 1.3317 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0230 ||:   1%|          | 1/110 [00:00<01:23,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0331 ||:   2%|1         | 2/110 [00:01<01:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0251 ||:   3%|2         | 3/110 [00:02<01:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:   4%|3         | 4/110 [00:02<01:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:   5%|4         | 5/110 [00:03<01:08,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:   5%|5         | 6/110 [00:03<01:07,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:   6%|6         | 7/110 [00:04<01:05,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:   7%|7         | 8/110 [00:05<01:04,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0237 ||:   8%|8         | 9/110 [00:05<01:04,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:   9%|9         | 10/110 [00:06<01:02,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  10%|#         | 11/110 [00:07<01:05,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  11%|#         | 12/110 [00:07<01:04,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  12%|#1        | 13/110 [00:08<01:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0252 ||:  13%|#2        | 14/110 [00:09<01:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  14%|#3        | 15/110 [00:09<01:00,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  15%|#4        | 16/110 [00:10<01:03,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  15%|#5        | 17/110 [00:11<00:59,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  16%|#6        | 18/110 [00:11<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  17%|#7        | 19/110 [00:12<01:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0254 ||:  18%|#8        | 20/110 [00:13<00:58,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  19%|#9        | 21/110 [00:13<00:55,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  20%|##        | 22/110 [00:14<00:55,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0267 ||:  21%|##        | 23/110 [00:15<00:54,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  22%|##1       | 24/110 [00:15<00:53,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  23%|##2       | 25/110 [00:16<00:55,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  24%|##3       | 26/110 [00:16<00:53,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  25%|##4       | 27/110 [00:17<00:51,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:  25%|##5       | 28/110 [00:18<00:52,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:  26%|##6       | 29/110 [00:18<00:51,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0253 ||:  27%|##7       | 30/110 [00:19<00:51,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0252 ||:  28%|##8       | 31/110 [00:20<00:50,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  29%|##9       | 32/110 [00:20<00:51,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  30%|###       | 33/110 [00:21<00:51,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  31%|###       | 34/110 [00:22<00:51,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  32%|###1      | 35/110 [00:22<00:50,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  33%|###2      | 36/110 [00:23<00:48,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  34%|###3      | 37/110 [00:24<00:47,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  35%|###4      | 38/110 [00:24<00:45,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  35%|###5      | 39/110 [00:25<00:45,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  36%|###6      | 40/110 [00:25<00:44,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  37%|###7      | 41/110 [00:26<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  38%|###8      | 42/110 [00:27<00:44,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  39%|###9      | 43/110 [00:28<00:44,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  40%|####      | 44/110 [00:28<00:43,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  41%|####      | 45/110 [00:29<00:42,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  42%|####1     | 46/110 [00:30<00:45,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0249 ||:  43%|####2     | 47/110 [00:30<00:42,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  44%|####3     | 48/110 [00:31<00:42,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:  45%|####4     | 49/110 [00:32<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0252 ||:  45%|####5     | 50/110 [00:32<00:38,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0255 ||:  46%|####6     | 51/110 [00:33<00:38,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0258 ||:  47%|####7     | 52/110 [00:33<00:37,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0255 ||:  48%|####8     | 53/110 [00:34<00:35,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0253 ||:  49%|####9     | 54/110 [00:35<00:37,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  50%|#####     | 55/110 [00:35<00:36,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  51%|#####     | 56/110 [00:36<00:34,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  52%|#####1    | 57/110 [00:37<00:32,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  53%|#####2    | 58/110 [00:37<00:32,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0268 ||:  54%|#####3    | 59/110 [00:38<00:32,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0268 ||:  55%|#####4    | 60/110 [00:39<00:32,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  55%|#####5    | 61/110 [00:39<00:32,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  56%|#####6    | 62/110 [00:40<00:29,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  57%|#####7    | 63/110 [00:40<00:28,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  58%|#####8    | 64/110 [00:41<00:28,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0263 ||:  59%|#####9    | 65/110 [00:42<00:27,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  60%|######    | 66/110 [00:42<00:27,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  61%|######    | 67/110 [00:43<00:27,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  62%|######1   | 68/110 [00:44<00:28,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  63%|######2   | 69/110 [00:44<00:27,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  64%|######3   | 70/110 [00:45<00:28,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  65%|######4   | 71/110 [00:46<00:26,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  65%|######5   | 72/110 [00:47<00:26,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  66%|######6   | 73/110 [00:47<00:25,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  67%|######7   | 74/110 [00:48<00:25,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  68%|######8   | 75/110 [00:49<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  69%|######9   | 76/110 [00:49<00:22,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  70%|#######   | 77/110 [00:50<00:21,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  71%|#######   | 78/110 [00:50<00:20,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:  72%|#######1  | 79/110 [00:51<00:19,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  73%|#######2  | 80/110 [00:52<00:19,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0257 ||:  74%|#######3  | 81/110 [00:52<00:18,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  75%|#######4  | 82/110 [00:53<00:18,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:  75%|#######5  | 83/110 [00:54<00:18,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  76%|#######6  | 84/110 [00:55<00:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  77%|#######7  | 85/110 [00:56<00:19,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  78%|#######8  | 86/110 [00:56<00:18,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||:  79%|#######9  | 87/110 [00:57<00:16,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  80%|########  | 88/110 [00:58<00:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  81%|########  | 89/110 [00:58<00:14,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  82%|########1 | 90/110 [01:00<00:18,  1.06it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0263 ||:  83%|########2 | 91/110 [01:00<00:16,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  84%|########3 | 92/110 [01:01<00:14,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  85%|########4 | 93/110 [01:02<00:12,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  85%|########5 | 94/110 [01:02<00:11,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  86%|########6 | 95/110 [01:03<00:11,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  87%|########7 | 96/110 [01:04<00:10,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  88%|########8 | 97/110 [01:05<00:09,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  89%|########9 | 98/110 [01:05<00:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  90%|######### | 99/110 [01:06<00:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  91%|######### | 100/110 [01:07<00:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  92%|#########1| 101/110 [01:07<00:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  93%|#########2| 102/110 [01:08<00:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0266 ||:  94%|#########3| 103/110 [01:09<00:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:  95%|#########4| 104/110 [01:09<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  95%|#########5| 105/110 [01:10<00:03,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:  96%|#########6| 106/110 [01:11<00:02,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0263 ||:  97%|#########7| 107/110 [01:12<00:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0263 ||:  98%|#########8| 108/110 [01:12<00:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:  99%|#########9| 109/110 [01:13<00:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||: 100%|##########| 110/110 [01:13<00:00,  1.70it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0260 ||: 100%|##########| 110/110 [01:13<00:00,  1.49it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.7500, no_result: 0.0625, loss: 1.2269 ||:   4%|4         | 1/24 [00:00<00:08,  2.62it/s]
+BLEU: 0.6886, acc: 0.7500, no_result: 0.0781, loss: 1.1488 ||:   8%|8         | 2/24 [00:00<00:08,  2.73it/s]
+BLEU: 0.6742, acc: 0.6979, no_result: 0.1250, loss: 1.2810 ||:  12%|#2        | 3/24 [00:01<00:07,  2.64it/s]
+BLEU: 0.7048, acc: 0.6875, no_result: 0.1328, loss: 1.1904 ||:  17%|#6        | 4/24 [00:01<00:07,  2.62it/s]
+BLEU: 0.7331, acc: 0.7125, no_result: 0.1250, loss: 1.1387 ||:  21%|##        | 5/24 [00:01<00:07,  2.63it/s]
+BLEU: 0.7321, acc: 0.7083, no_result: 0.1146, loss: 1.1360 ||:  25%|##5       | 6/24 [00:02<00:06,  2.75it/s]
+BLEU: 0.7377, acc: 0.7143, no_result: 0.1071, loss: 1.1216 ||:  29%|##9       | 7/24 [00:02<00:05,  2.90it/s]
+BLEU: 0.7337, acc: 0.7148, no_result: 0.1055, loss: 1.1442 ||:  33%|###3      | 8/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.7219, acc: 0.7118, no_result: 0.1111, loss: 1.1424 ||:  38%|###7      | 9/24 [00:03<00:05,  2.89it/s]
+BLEU: 0.7329, acc: 0.6969, no_result: 0.1031, loss: 1.1615 ||:  42%|####1     | 10/24 [00:03<00:04,  2.83it/s]
+BLEU: 0.7327, acc: 0.6847, no_result: 0.1080, loss: 1.2111 ||:  46%|####5     | 11/24 [00:03<00:04,  2.71it/s]
+BLEU: 0.7281, acc: 0.6875, no_result: 0.1042, loss: 1.2274 ||:  50%|#####     | 12/24 [00:04<00:04,  2.80it/s]
+BLEU: 0.7248, acc: 0.6827, no_result: 0.1058, loss: 1.2495 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.73it/s]
+BLEU: 0.7251, acc: 0.6696, no_result: 0.1049, loss: 1.2947 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.64it/s]
+BLEU: 0.7160, acc: 0.6500, no_result: 0.1250, loss: 1.3550 ||:  62%|######2   | 15/24 [00:05<00:03,  2.45it/s]
+BLEU: 0.7118, acc: 0.6426, no_result: 0.1270, loss: 1.3780 ||:  67%|######6   | 16/24 [00:06<00:03,  2.41it/s]
+BLEU: 0.7100, acc: 0.6360, no_result: 0.1305, loss: 1.3706 ||:  71%|#######   | 17/24 [00:06<00:02,  2.47it/s]
+BLEU: 0.7054, acc: 0.6319, no_result: 0.1337, loss: 1.3812 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.47it/s]
+BLEU: 0.7076, acc: 0.6382, no_result: 0.1299, loss: 1.3593 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.56it/s]
+BLEU: 0.7102, acc: 0.6422, no_result: 0.1266, loss: 1.3464 ||:  83%|########3 | 20/24 [00:07<00:01,  2.70it/s]
+BLEU: 0.7064, acc: 0.6399, no_result: 0.1235, loss: 1.3514 ||:  88%|########7 | 21/24 [00:07<00:01,  2.69it/s]
+BLEU: 0.7102, acc: 0.6392, no_result: 0.1207, loss: 1.3442 ||:  92%|#########1| 22/24 [00:08<00:00,  2.67it/s]
+BLEU: 0.7128, acc: 0.6454, no_result: 0.1182, loss: 1.3222 ||:  96%|#########5| 23/24 [00:08<00:00,  2.74it/s]
+BLEU: 0.7123, acc: 0.6416, no_result: 0.1179, loss: 1.3444 ||: 100%|##########| 24/24 [00:08<00:00,  3.40it/s]
+BLEU: 0.7123, acc: 0.6416, no_result: 0.1179, loss: 1.3444 ||: 100%|##########| 24/24 [00:08<00:00,  2.75it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:   1%|          | 1/110 [00:00<01:09,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:   2%|1         | 2/110 [00:01<01:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:   3%|2         | 3/110 [00:02<01:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:   4%|3         | 4/110 [00:02<01:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:   5%|4         | 5/110 [00:03<01:13,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:   5%|5         | 6/110 [00:04<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:   6%|6         | 7/110 [00:04<01:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:   7%|7         | 8/110 [00:05<01:15,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:   8%|8         | 9/110 [00:06<01:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:   9%|9         | 10/110 [00:07<01:11,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  10%|#         | 11/110 [00:07<01:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  11%|#         | 12/110 [00:08<01:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  12%|#1        | 13/110 [00:09<01:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  13%|#2        | 14/110 [00:09<01:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  14%|#3        | 15/110 [00:10<01:06,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  15%|#4        | 16/110 [00:11<01:03,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  15%|#5        | 17/110 [00:11<01:03,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0218 ||:  16%|#6        | 18/110 [00:12<01:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||:  17%|#7        | 19/110 [00:13<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0236 ||:  18%|#8        | 20/110 [00:13<01:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0230 ||:  19%|#9        | 21/110 [00:14<01:01,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0230 ||:  20%|##        | 22/110 [00:15<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  21%|##        | 23/110 [00:15<00:59,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0230 ||:  22%|##1       | 24/110 [00:16<00:56,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0236 ||:  23%|##2       | 25/110 [00:17<00:57,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0236 ||:  24%|##3       | 26/110 [00:17<00:56,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0232 ||:  25%|##4       | 27/110 [00:18<00:57,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0234 ||:  25%|##5       | 28/110 [00:19<00:55,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0236 ||:  26%|##6       | 29/110 [00:19<00:57,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||:  27%|##7       | 30/110 [00:20<00:56,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0232 ||:  28%|##8       | 31/110 [00:21<00:54,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0233 ||:  29%|##9       | 32/110 [00:22<00:54,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0234 ||:  30%|###       | 33/110 [00:22<00:52,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0236 ||:  31%|###       | 34/110 [00:23<00:50,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0232 ||:  32%|###1      | 35/110 [00:23<00:49,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0234 ||:  33%|###2      | 36/110 [00:24<00:54,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  34%|###3      | 37/110 [00:25<00:53,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  35%|###4      | 38/110 [00:26<00:51,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0239 ||:  35%|###5      | 39/110 [00:26<00:48,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0237 ||:  36%|###6      | 40/110 [00:27<00:49,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  37%|###7      | 41/110 [00:28<00:46,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  38%|###8      | 42/110 [00:28<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:  39%|###9      | 43/110 [00:29<00:45,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  40%|####      | 44/110 [00:30<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  41%|####      | 45/110 [00:30<00:43,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  42%|####1     | 46/110 [00:31<00:45,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  43%|####2     | 47/110 [00:32<00:42,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  44%|####3     | 48/110 [00:33<00:42,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  45%|####4     | 49/110 [00:33<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  45%|####5     | 50/110 [00:34<00:41,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  46%|####6     | 51/110 [00:35<00:42,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  47%|####7     | 52/110 [00:35<00:41,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  48%|####8     | 53/110 [00:36<00:39,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0247 ||:  49%|####9     | 54/110 [00:37<00:37,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  50%|#####     | 55/110 [00:37<00:35,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  51%|#####     | 56/110 [00:38<00:35,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  52%|#####1    | 57/110 [00:39<00:34,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  53%|#####2    | 58/110 [00:39<00:32,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  54%|#####3    | 59/110 [00:40<00:31,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  55%|#####4    | 60/110 [00:40<00:31,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  55%|#####5    | 61/110 [00:41<00:30,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  56%|#####6    | 62/110 [00:42<00:29,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  57%|#####7    | 63/110 [00:42<00:29,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  58%|#####8    | 64/110 [00:43<00:28,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  59%|#####9    | 65/110 [00:43<00:27,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  60%|######    | 66/110 [00:44<00:28,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  61%|######    | 67/110 [00:45<00:29,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  62%|######1   | 68/110 [00:46<00:29,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  63%|######2   | 69/110 [00:46<00:27,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  64%|######3   | 70/110 [00:47<00:26,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  65%|######4   | 71/110 [00:48<00:25,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  65%|######5   | 72/110 [00:48<00:24,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  66%|######6   | 73/110 [00:49<00:24,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  67%|######7   | 74/110 [00:50<00:23,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  68%|######8   | 75/110 [00:50<00:22,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  69%|######9   | 76/110 [00:51<00:21,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  70%|#######   | 77/110 [00:52<00:22,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  71%|#######   | 78/110 [00:52<00:20,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  72%|#######1  | 79/110 [00:53<00:19,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  73%|#######2  | 80/110 [00:54<00:29,  1.03it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:  74%|#######3  | 81/110 [00:55<00:24,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0246 ||:  75%|#######4  | 82/110 [00:56<00:21,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  75%|#######5  | 83/110 [00:56<00:19,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0241 ||:  77%|#######7  | 85/110 [00:58<00:17,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0241 ||:  79%|#######9  | 87/110 [00:59<00:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  80%|########  | 88/110 [01:00<00:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0239 ||:  81%|########  | 89/110 [01:00<00:13,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0238 ||:  82%|########1 | 90/110 [01:01<00:13,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0239 ||:  83%|########2 | 91/110 [01:02<00:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  84%|########3 | 92/110 [01:02<00:12,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0239 ||:  85%|########4 | 93/110 [01:03<00:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0239 ||:  85%|########5 | 94/110 [01:04<00:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  86%|########6 | 95/110 [01:04<00:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0239 ||:  87%|########7 | 96/110 [01:05<00:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  88%|########8 | 97/110 [01:06<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0241 ||:  89%|########9 | 98/110 [01:06<00:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0241 ||:  90%|######### | 99/110 [01:07<00:07,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  91%|######### | 100/110 [01:08<00:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:  92%|#########1| 101/110 [01:08<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  93%|#########2| 102/110 [01:09<00:05,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  94%|#########3| 103/110 [01:10<00:04,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  95%|#########4| 104/110 [01:10<00:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  95%|#########5| 105/110 [01:11<00:03,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  96%|#########6| 106/110 [01:11<00:02,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0244 ||:  97%|#########7| 107/110 [01:12<00:01,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  98%|#########8| 108/110 [01:13<00:01,  1.66it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:  99%|#########9| 109/110 [01:13<00:00,  1.68it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||: 100%|##########| 110/110 [01:13<00:00,  1.96it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||: 100%|##########| 110/110 [01:13<00:00,  1.49it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.0938, loss: 1.2726 ||:   4%|4         | 1/24 [00:00<00:08,  2.67it/s]
+BLEU: 0.7143, acc: 0.6875, no_result: 0.0781, loss: 1.1660 ||:   8%|8         | 2/24 [00:00<00:07,  2.81it/s]
+BLEU: 0.6877, acc: 0.6562, no_result: 0.1250, loss: 1.2988 ||:  12%|#2        | 3/24 [00:01<00:07,  2.83it/s]
+BLEU: 0.7124, acc: 0.6562, no_result: 0.1328, loss: 1.2063 ||:  17%|#6        | 4/24 [00:01<00:07,  2.73it/s]
+BLEU: 0.7380, acc: 0.6813, no_result: 0.1313, loss: 1.1533 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7350, acc: 0.6719, no_result: 0.1198, loss: 1.1483 ||:  25%|##5       | 6/24 [00:02<00:06,  2.79it/s]
+BLEU: 0.7437, acc: 0.6830, no_result: 0.1116, loss: 1.1271 ||:  29%|##9       | 7/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7415, acc: 0.6914, no_result: 0.1094, loss: 1.1544 ||:  33%|###3      | 8/24 [00:02<00:05,  2.96it/s]
+BLEU: 0.7318, acc: 0.6944, no_result: 0.1146, loss: 1.1495 ||:  38%|###7      | 9/24 [00:03<00:05,  2.89it/s]
+BLEU: 0.7403, acc: 0.6781, no_result: 0.1125, loss: 1.1691 ||:  42%|####1     | 10/24 [00:03<00:04,  2.82it/s]
+BLEU: 0.7388, acc: 0.6648, no_result: 0.1165, loss: 1.2186 ||:  46%|####5     | 11/24 [00:03<00:04,  2.69it/s]
+BLEU: 0.7350, acc: 0.6719, no_result: 0.1120, loss: 1.2351 ||:  50%|#####     | 12/24 [00:04<00:04,  2.78it/s]
+BLEU: 0.7339, acc: 0.6707, no_result: 0.1154, loss: 1.2555 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.7340, acc: 0.6607, no_result: 0.1138, loss: 1.3051 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.61it/s]
+BLEU: 0.7259, acc: 0.6396, no_result: 0.1354, loss: 1.3654 ||:  62%|######2   | 15/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.7224, acc: 0.6348, no_result: 0.1367, loss: 1.3868 ||:  67%|######6   | 16/24 [00:06<00:03,  2.38it/s]
+BLEU: 0.7197, acc: 0.6268, no_result: 0.1452, loss: 1.3768 ||:  71%|#######   | 17/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7167, acc: 0.6233, no_result: 0.1476, loss: 1.3867 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.7184, acc: 0.6283, no_result: 0.1431, loss: 1.3658 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.7203, acc: 0.6328, no_result: 0.1391, loss: 1.3533 ||:  83%|########3 | 20/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7169, acc: 0.6310, no_result: 0.1339, loss: 1.3568 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7217, acc: 0.6293, no_result: 0.1321, loss: 1.3524 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.7242, acc: 0.6372, no_result: 0.1264, loss: 1.3296 ||:  96%|#########5| 23/24 [00:08<00:00,  2.72it/s]
+BLEU: 0.7244, acc: 0.6385, no_result: 0.1211, loss: 1.3536 ||: 100%|##########| 24/24 [00:08<00:00,  3.38it/s]
+BLEU: 0.7244, acc: 0.6385, no_result: 0.1211, loss: 1.3536 ||: 100%|##########| 24/24 [00:08<00:00,  2.75it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0668 ||:   1%|          | 1/110 [00:00<01:25,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0378 ||:   2%|1         | 2/110 [00:01<01:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0296 ||:   3%|2         | 3/110 [00:02<01:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0262 ||:   4%|3         | 4/110 [00:02<01:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:   5%|4         | 5/110 [00:03<01:08,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0264 ||:   5%|5         | 6/110 [00:03<01:07,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:   6%|6         | 7/110 [00:04<01:14,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0238 ||:   7%|7         | 8/110 [00:05<01:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0237 ||:   8%|8         | 9/110 [00:06<01:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:   9%|9         | 10/110 [00:06<01:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  10%|#         | 11/110 [00:07<01:11,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  11%|#         | 12/110 [00:08<01:13,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  12%|#1        | 13/110 [00:09<01:08,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  13%|#2        | 14/110 [00:09<01:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  14%|#3        | 15/110 [00:10<01:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  15%|#4        | 16/110 [00:10<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  15%|#5        | 17/110 [00:11<00:59,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  16%|#6        | 18/110 [00:12<01:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  17%|#7        | 19/110 [00:13<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  18%|#8        | 20/110 [00:13<01:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  19%|#9        | 21/110 [00:14<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  20%|##        | 22/110 [00:15<01:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  21%|##        | 23/110 [00:15<01:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  22%|##1       | 24/110 [00:16<01:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  23%|##2       | 25/110 [00:17<01:02,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  24%|##3       | 26/110 [00:18<01:03,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  25%|##4       | 27/110 [00:18<01:00,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  25%|##5       | 28/110 [00:19<00:59,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  26%|##6       | 29/110 [00:20<00:57,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  27%|##7       | 30/110 [00:21<01:00,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  28%|##8       | 31/110 [00:21<00:58,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  29%|##9       | 32/110 [00:22<00:58,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  30%|###       | 33/110 [00:23<01:00,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  31%|###       | 34/110 [00:24<00:58,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  32%|###1      | 35/110 [00:24<00:56,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  33%|###2      | 36/110 [00:25<00:56,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  34%|###3      | 37/110 [00:26<00:55,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  35%|###4      | 38/110 [00:27<00:54,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  35%|###5      | 39/110 [00:27<00:53,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  36%|###6      | 40/110 [00:28<00:51,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  37%|###7      | 41/110 [00:29<00:52,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  38%|###8      | 42/110 [00:30<00:50,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  39%|###9      | 43/110 [00:30<00:47,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  40%|####      | 44/110 [00:31<00:46,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  41%|####      | 45/110 [00:32<00:48,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  42%|####1     | 46/110 [00:33<00:49,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  43%|####2     | 47/110 [00:33<00:47,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  44%|####3     | 48/110 [00:34<00:47,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  45%|####4     | 49/110 [00:35<00:44,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  45%|####5     | 50/110 [00:36<00:44,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  46%|####6     | 51/110 [00:36<00:43,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  47%|####7     | 52/110 [00:37<00:42,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  48%|####8     | 53/110 [00:38<00:40,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  49%|####9     | 54/110 [00:38<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  50%|#####     | 55/110 [00:39<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  51%|#####     | 56/110 [00:40<00:37,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  52%|#####1    | 57/110 [00:40<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  53%|#####2    | 58/110 [00:41<00:40,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  54%|#####3    | 59/110 [00:42<00:39,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  55%|#####4    | 60/110 [00:43<00:36,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  55%|#####5    | 61/110 [00:43<00:34,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  56%|#####6    | 62/110 [00:44<00:33,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  57%|#####7    | 63/110 [00:45<00:31,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  58%|#####8    | 64/110 [00:45<00:29,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  59%|#####9    | 65/110 [00:46<00:28,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  60%|######    | 66/110 [00:47<00:27,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  61%|######    | 67/110 [00:47<00:26,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  62%|######1   | 68/110 [00:48<00:28,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  63%|######2   | 69/110 [00:49<00:27,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  64%|######3   | 70/110 [00:50<00:35,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  65%|######4   | 71/110 [00:51<00:32,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  65%|######5   | 72/110 [00:52<00:31,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  66%|######6   | 73/110 [00:52<00:29,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  67%|######7   | 74/110 [00:53<00:26,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  68%|######8   | 75/110 [00:54<00:25,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  69%|######9   | 76/110 [00:54<00:25,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  70%|#######   | 77/110 [00:55<00:24,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  71%|#######   | 78/110 [00:56<00:22,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  72%|#######1  | 79/110 [00:57<00:22,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  73%|#######2  | 80/110 [00:57<00:21,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  74%|#######3  | 81/110 [00:58<00:21,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  75%|#######4  | 82/110 [00:59<00:20,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  75%|#######5  | 83/110 [00:59<00:19,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  76%|#######6  | 84/110 [01:00<00:18,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  77%|#######7  | 85/110 [01:01<00:17,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  78%|#######8  | 86/110 [01:02<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  79%|#######9  | 87/110 [01:02<00:16,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  80%|########  | 88/110 [01:03<00:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0218 ||:  81%|########  | 89/110 [01:04<00:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  82%|########1 | 90/110 [01:04<00:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  83%|########2 | 91/110 [01:05<00:13,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  84%|########3 | 92/110 [01:06<00:13,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  85%|########4 | 93/110 [01:07<00:12,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  85%|########5 | 94/110 [01:07<00:12,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  86%|########6 | 95/110 [01:08<00:11,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  87%|########7 | 96/110 [01:09<00:10,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  88%|########8 | 97/110 [01:10<00:09,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  89%|########9 | 98/110 [01:10<00:09,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  90%|######### | 99/110 [01:11<00:07,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  91%|######### | 100/110 [01:12<00:07,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  92%|#########1| 101/110 [01:13<00:06,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  93%|#########2| 102/110 [01:13<00:05,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  94%|#########3| 103/110 [01:14<00:05,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  95%|#########4| 104/110 [01:15<00:04,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  95%|#########5| 105/110 [01:16<00:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  96%|#########6| 106/110 [01:16<00:02,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  97%|#########7| 107/110 [01:17<00:02,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  98%|#########8| 108/110 [01:18<00:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||:  99%|#########9| 109/110 [01:18<00:00,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||: 100%|##########| 110/110 [01:19<00:00,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||: 100%|##########| 110/110 [01:19<00:00,  1.39it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2575 ||:   4%|4         | 1/24 [00:00<00:09,  2.31it/s]
+BLEU: 0.7182, acc: 0.6875, no_result: 0.1094, loss: 1.1649 ||:   8%|8         | 2/24 [00:00<00:09,  2.40it/s]
+BLEU: 0.6940, acc: 0.6667, no_result: 0.1354, loss: 1.3019 ||:  12%|#2        | 3/24 [00:01<00:08,  2.40it/s]
+BLEU: 0.7188, acc: 0.6719, no_result: 0.1328, loss: 1.2101 ||:  17%|#6        | 4/24 [00:01<00:08,  2.37it/s]
+BLEU: 0.7424, acc: 0.6937, no_result: 0.1313, loss: 1.1660 ||:  21%|##        | 5/24 [00:02<00:08,  2.34it/s]
+BLEU: 0.7422, acc: 0.6823, no_result: 0.1198, loss: 1.1673 ||:  25%|##5       | 6/24 [00:02<00:07,  2.41it/s]
+BLEU: 0.7512, acc: 0.6920, no_result: 0.1116, loss: 1.1430 ||:  29%|##9       | 7/24 [00:02<00:06,  2.53it/s]
+BLEU: 0.7480, acc: 0.6953, no_result: 0.1094, loss: 1.1646 ||:  33%|###3      | 8/24 [00:03<00:06,  2.55it/s]
+BLEU: 0.7376, acc: 0.7083, no_result: 0.1076, loss: 1.1593 ||:  38%|###7      | 9/24 [00:03<00:06,  2.48it/s]
+BLEU: 0.7447, acc: 0.6906, no_result: 0.1062, loss: 1.1779 ||:  42%|####1     | 10/24 [00:04<00:05,  2.42it/s]
+BLEU: 0.7420, acc: 0.6847, no_result: 0.1080, loss: 1.2289 ||:  46%|####5     | 11/24 [00:04<00:05,  2.32it/s]
+BLEU: 0.7372, acc: 0.6901, no_result: 0.1016, loss: 1.2431 ||:  50%|#####     | 12/24 [00:04<00:05,  2.37it/s]
+BLEU: 0.7360, acc: 0.6875, no_result: 0.1082, loss: 1.2643 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.31it/s]
+BLEU: 0.7355, acc: 0.6786, no_result: 0.1049, loss: 1.3142 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.22it/s]
+BLEU: 0.7276, acc: 0.6562, no_result: 0.1271, loss: 1.3760 ||:  62%|######2   | 15/24 [00:06<00:04,  2.06it/s]
+BLEU: 0.7230, acc: 0.6504, no_result: 0.1289, loss: 1.3994 ||:  67%|######6   | 16/24 [00:07<00:03,  2.03it/s]
+BLEU: 0.7206, acc: 0.6434, no_result: 0.1342, loss: 1.3878 ||:  71%|#######   | 17/24 [00:07<00:03,  2.09it/s]
+BLEU: 0.7176, acc: 0.6389, no_result: 0.1372, loss: 1.3985 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.08it/s]
+BLEU: 0.7203, acc: 0.6447, no_result: 0.1332, loss: 1.3765 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.17it/s]
+BLEU: 0.7229, acc: 0.6484, no_result: 0.1297, loss: 1.3630 ||:  83%|########3 | 20/24 [00:08<00:01,  2.30it/s]
+BLEU: 0.7196, acc: 0.6473, no_result: 0.1250, loss: 1.3667 ||:  88%|########7 | 21/24 [00:09<00:01,  2.31it/s]
+BLEU: 0.7233, acc: 0.6435, no_result: 0.1236, loss: 1.3624 ||:  92%|#########1| 22/24 [00:09<00:00,  2.29it/s]
+BLEU: 0.7258, acc: 0.6495, no_result: 0.1196, loss: 1.3390 ||:  96%|#########5| 23/24 [00:10<00:00,  2.36it/s]
+BLEU: 0.7256, acc: 0.6502, no_result: 0.1146, loss: 1.3603 ||: 100%|##########| 24/24 [00:10<00:00,  2.90it/s]
+BLEU: 0.7256, acc: 0.6502, no_result: 0.1146, loss: 1.3603 ||: 100%|##########| 24/24 [00:10<00:00,  2.36it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:   1%|          | 1/110 [00:00<01:14,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:   2%|1         | 2/110 [00:01<01:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:   3%|2         | 3/110 [00:01<01:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:   4%|3         | 4/110 [00:02<01:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:   5%|4         | 5/110 [00:03<01:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0276 ||:   5%|5         | 6/110 [00:04<01:14,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0274 ||:   6%|6         | 7/110 [00:05<01:16,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:   7%|7         | 8/110 [00:05<01:16,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0250 ||:   8%|8         | 9/110 [00:06<01:18,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0243 ||:   9%|9         | 10/110 [00:07<01:15,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  10%|#         | 11/110 [00:08<01:13,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  11%|#         | 12/110 [00:08<01:10,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  12%|#1        | 13/110 [00:09<01:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0233 ||:  13%|#2        | 14/110 [00:10<01:10,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0230 ||:  14%|#3        | 15/110 [00:10<01:09,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  15%|#4        | 16/110 [00:11<01:06,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  15%|#5        | 17/110 [00:12<01:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  16%|#6        | 18/110 [00:12<01:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  17%|#7        | 19/110 [00:13<01:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  18%|#8        | 20/110 [00:14<01:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  19%|#9        | 21/110 [00:15<01:03,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0218 ||:  20%|##        | 22/110 [00:15<01:03,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  21%|##        | 23/110 [00:16<01:00,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  22%|##1       | 24/110 [00:17<01:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  23%|##2       | 25/110 [00:17<01:01,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  24%|##3       | 26/110 [00:18<00:59,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  25%|##4       | 27/110 [00:19<00:59,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  25%|##5       | 28/110 [00:20<00:58,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  26%|##6       | 29/110 [00:20<00:57,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  27%|##7       | 30/110 [00:21<00:58,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  28%|##8       | 31/110 [00:22<00:56,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  29%|##9       | 32/110 [00:23<00:55,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  30%|###       | 33/110 [00:23<00:53,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  31%|###       | 34/110 [00:24<00:53,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  32%|###1      | 35/110 [00:25<00:54,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  33%|###2      | 36/110 [00:25<00:55,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  34%|###3      | 37/110 [00:26<00:57,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  35%|###4      | 38/110 [00:27<00:57,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  35%|###5      | 39/110 [00:28<00:52,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  36%|###6      | 40/110 [00:28<00:50,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  37%|###7      | 41/110 [00:29<00:50,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  38%|###8      | 42/110 [00:30<00:48,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  39%|###9      | 43/110 [00:31<00:47,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  40%|####      | 44/110 [00:31<00:47,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  41%|####      | 45/110 [00:32<00:45,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  42%|####1     | 46/110 [00:33<00:44,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  43%|####2     | 47/110 [00:33<00:44,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  44%|####3     | 48/110 [00:34<00:48,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  45%|####4     | 49/110 [00:35<00:48,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  45%|####5     | 50/110 [00:36<00:44,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  46%|####6     | 51/110 [00:36<00:42,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  47%|####7     | 52/110 [00:37<00:40,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  48%|####8     | 53/110 [00:38<00:39,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  49%|####9     | 54/110 [00:38<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  50%|#####     | 55/110 [00:39<00:37,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  51%|#####     | 56/110 [00:40<00:37,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  52%|#####1    | 57/110 [00:40<00:35,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  53%|#####2    | 58/110 [00:41<00:36,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  54%|#####3    | 59/110 [00:42<00:35,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  55%|#####4    | 60/110 [00:44<00:48,  1.04it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  55%|#####5    | 61/110 [00:44<00:43,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  56%|#####6    | 62/110 [00:45<00:43,  1.10it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  57%|#####7    | 63/110 [00:46<00:39,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  58%|#####8    | 64/110 [00:47<00:37,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0218 ||:  59%|#####9    | 65/110 [00:47<00:35,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  60%|######    | 66/110 [00:48<00:33,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  61%|######    | 67/110 [00:49<00:32,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  62%|######1   | 68/110 [00:50<00:33,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  63%|######2   | 69/110 [00:50<00:30,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  64%|######3   | 70/110 [00:51<00:28,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  65%|######4   | 71/110 [00:52<00:27,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  65%|######5   | 72/110 [00:52<00:27,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  66%|######6   | 73/110 [00:53<00:26,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  67%|######7   | 74/110 [00:54<00:23,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  68%|######8   | 75/110 [00:54<00:22,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  69%|######9   | 76/110 [00:55<00:22,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  70%|#######   | 77/110 [00:56<00:21,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  71%|#######   | 78/110 [00:56<00:21,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  72%|#######1  | 79/110 [00:57<00:20,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  73%|#######2  | 80/110 [00:58<00:19,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  74%|#######3  | 81/110 [00:58<00:18,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  75%|#######4  | 82/110 [00:59<00:18,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  75%|#######5  | 83/110 [01:00<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  76%|#######6  | 84/110 [01:00<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  77%|#######7  | 85/110 [01:01<00:16,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  78%|#######8  | 86/110 [01:02<00:16,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  79%|#######9  | 87/110 [01:02<00:15,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  80%|########  | 88/110 [01:03<00:14,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  81%|########  | 89/110 [01:04<00:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  82%|########1 | 90/110 [01:04<00:13,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  83%|########2 | 91/110 [01:05<00:12,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  84%|########3 | 92/110 [01:06<00:11,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  85%|########4 | 93/110 [01:06<00:11,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  85%|########5 | 94/110 [01:07<00:10,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  86%|########6 | 95/110 [01:08<00:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  87%|########7 | 96/110 [01:08<00:09,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  88%|########8 | 97/110 [01:09<00:09,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  89%|########9 | 98/110 [01:10<00:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  90%|######### | 99/110 [01:10<00:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  91%|######### | 100/110 [01:11<00:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  92%|#########1| 101/110 [01:12<00:06,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  93%|#########2| 102/110 [01:12<00:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  94%|#########3| 103/110 [01:13<00:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  95%|#########4| 104/110 [01:14<00:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  95%|#########5| 105/110 [01:14<00:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  96%|#########6| 106/110 [01:15<00:02,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  97%|#########7| 107/110 [01:16<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  98%|#########8| 108/110 [01:16<00:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  99%|#########9| 109/110 [01:17<00:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||: 100%|##########| 110/110 [01:17<00:00,  1.77it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||: 100%|##########| 110/110 [01:17<00:00,  1.41it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2341 ||:   4%|4         | 1/24 [00:00<00:08,  2.73it/s]
+BLEU: 0.7164, acc: 0.6719, no_result: 0.1094, loss: 1.1581 ||:   8%|8         | 2/24 [00:00<00:07,  2.83it/s]
+BLEU: 0.6922, acc: 0.6354, no_result: 0.1562, loss: 1.3028 ||:  12%|#2        | 3/24 [00:01<00:07,  2.85it/s]
+BLEU: 0.7158, acc: 0.6484, no_result: 0.1484, loss: 1.2101 ||:  17%|#6        | 4/24 [00:01<00:07,  2.75it/s]
+BLEU: 0.7418, acc: 0.6750, no_result: 0.1437, loss: 1.1585 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7493, acc: 0.6615, no_result: 0.1354, loss: 1.1580 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.7572, acc: 0.6741, no_result: 0.1250, loss: 1.1360 ||:  29%|##9       | 7/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7518, acc: 0.6797, no_result: 0.1211, loss: 1.1594 ||:  33%|###3      | 8/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.7406, acc: 0.6910, no_result: 0.1215, loss: 1.1579 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7482, acc: 0.6781, no_result: 0.1125, loss: 1.1765 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.7458, acc: 0.6619, no_result: 0.1165, loss: 1.2279 ||:  46%|####5     | 11/24 [00:03<00:04,  2.73it/s]
+BLEU: 0.7397, acc: 0.6693, no_result: 0.1094, loss: 1.2428 ||:  50%|#####     | 12/24 [00:04<00:04,  2.82it/s]
+BLEU: 0.7388, acc: 0.6683, no_result: 0.1154, loss: 1.2657 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.73it/s]
+BLEU: 0.7372, acc: 0.6585, no_result: 0.1161, loss: 1.3134 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.63it/s]
+BLEU: 0.7271, acc: 0.6396, no_result: 0.1375, loss: 1.3754 ||:  62%|######2   | 15/24 [00:05<00:03,  2.43it/s]
+BLEU: 0.7213, acc: 0.6328, no_result: 0.1387, loss: 1.4008 ||:  67%|######6   | 16/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.7205, acc: 0.6268, no_result: 0.1434, loss: 1.3883 ||:  71%|#######   | 17/24 [00:06<00:02,  2.47it/s]
+BLEU: 0.7160, acc: 0.6250, no_result: 0.1458, loss: 1.3973 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7178, acc: 0.6299, no_result: 0.1414, loss: 1.3770 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.7206, acc: 0.6344, no_result: 0.1375, loss: 1.3637 ||:  83%|########3 | 20/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7173, acc: 0.6339, no_result: 0.1324, loss: 1.3680 ||:  88%|########7 | 21/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7214, acc: 0.6321, no_result: 0.1293, loss: 1.3627 ||:  92%|#########1| 22/24 [00:08<00:00,  2.66it/s]
+BLEU: 0.7245, acc: 0.6427, no_result: 0.1236, loss: 1.3386 ||:  96%|#########5| 23/24 [00:08<00:00,  2.74it/s]
+BLEU: 0.7241, acc: 0.6437, no_result: 0.1185, loss: 1.3589 ||: 100%|##########| 24/24 [00:08<00:00,  3.41it/s]
+BLEU: 0.7241, acc: 0.6437, no_result: 0.1185, loss: 1.3589 ||: 100%|##########| 24/24 [00:08<00:00,  2.77it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0259 ||:   1%|          | 1/110 [00:00<01:28,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0233 ||:   2%|1         | 2/110 [00:01<01:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:   3%|2         | 3/110 [00:02<01:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:   4%|3         | 4/110 [00:02<01:11,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:   5%|4         | 5/110 [00:03<01:07,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:   5%|5         | 6/110 [00:03<01:04,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:   6%|6         | 7/110 [00:04<01:03,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:   7%|7         | 8/110 [00:05<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:   8%|8         | 9/110 [00:05<01:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:   9%|9         | 10/110 [00:06<01:02,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  10%|#         | 11/110 [00:07<01:02,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  11%|#         | 12/110 [00:07<01:02,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  12%|#1        | 13/110 [00:08<01:02,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  13%|#2        | 14/110 [00:09<01:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0234 ||:  14%|#3        | 15/110 [00:09<01:02,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  15%|#4        | 16/110 [00:10<01:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0240 ||:  15%|#5        | 17/110 [00:11<01:03,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0238 ||:  16%|#6        | 18/110 [00:11<01:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0236 ||:  17%|#7        | 19/110 [00:12<01:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0235 ||:  18%|#8        | 20/110 [00:13<01:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  19%|#9        | 21/110 [00:13<01:00,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0236 ||:  20%|##        | 22/110 [00:14<00:58,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0239 ||:  21%|##        | 23/110 [00:15<01:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0239 ||:  22%|##1       | 24/110 [00:15<00:57,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0235 ||:  23%|##2       | 25/110 [00:16<00:55,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0235 ||:  24%|##3       | 26/110 [00:17<00:54,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0232 ||:  25%|##4       | 27/110 [00:17<00:52,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||:  25%|##5       | 28/110 [00:18<00:51,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||:  26%|##6       | 29/110 [00:19<00:55,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  27%|##7       | 30/110 [00:19<00:56,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  28%|##8       | 31/110 [00:20<00:53,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  29%|##9       | 32/110 [00:21<00:55,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  30%|###       | 33/110 [00:21<00:52,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  31%|###       | 34/110 [00:22<00:51,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  32%|###1      | 35/110 [00:23<00:49,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  33%|###2      | 36/110 [00:23<00:49,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  34%|###3      | 37/110 [00:24<00:48,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||:  35%|###4      | 38/110 [00:25<00:47,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0230 ||:  35%|###5      | 39/110 [00:26<00:49,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  36%|###6      | 40/110 [00:26<00:48,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0230 ||:  37%|###7      | 41/110 [00:27<00:49,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  38%|###8      | 42/110 [00:28<00:47,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  39%|###9      | 43/110 [00:28<00:44,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  40%|####      | 44/110 [00:29<00:46,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  41%|####      | 45/110 [00:30<00:44,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  42%|####1     | 46/110 [00:30<00:42,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  43%|####2     | 47/110 [00:31<00:40,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  44%|####3     | 48/110 [00:32<00:40,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  45%|####4     | 49/110 [00:32<00:38,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  45%|####5     | 50/110 [00:34<00:57,  1.04it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  46%|####6     | 51/110 [00:35<00:51,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  47%|####7     | 52/110 [00:35<00:47,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  48%|####8     | 53/110 [00:36<00:43,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  49%|####9     | 54/110 [00:36<00:39,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0230 ||:  50%|#####     | 55/110 [00:37<00:37,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  51%|#####     | 56/110 [00:38<00:35,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  52%|#####1    | 57/110 [00:38<00:34,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  53%|#####2    | 58/110 [00:39<00:33,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  55%|#####4    | 60/110 [00:40<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  55%|#####5    | 61/110 [00:41<00:33,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  56%|#####6    | 62/110 [00:42<00:31,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  58%|#####8    | 64/110 [00:43<00:31,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  60%|######    | 66/110 [00:44<00:30,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  61%|######    | 67/110 [00:45<00:30,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  62%|######1   | 68/110 [00:46<00:29,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  63%|######2   | 69/110 [00:47<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  64%|######3   | 70/110 [00:47<00:26,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  65%|######4   | 71/110 [00:48<00:27,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  65%|######5   | 72/110 [00:49<00:27,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  66%|######6   | 73/110 [00:50<00:28,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  67%|######7   | 74/110 [00:50<00:25,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  68%|######8   | 75/110 [00:51<00:24,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  69%|######9   | 76/110 [00:51<00:22,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  70%|#######   | 77/110 [00:52<00:21,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  71%|#######   | 78/110 [00:53<00:20,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  72%|#######1  | 79/110 [00:53<00:19,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  74%|#######3  | 81/110 [00:55<00:18,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  77%|#######7  | 85/110 [00:57<00:17,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0229 ||:  79%|#######9  | 87/110 [00:59<00:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  80%|########  | 88/110 [00:59<00:14,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  81%|########  | 89/110 [01:00<00:14,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  82%|########1 | 90/110 [01:01<00:13,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  83%|########2 | 91/110 [01:01<00:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  84%|########3 | 92/110 [01:02<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  85%|########4 | 93/110 [01:03<00:11,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  85%|########5 | 94/110 [01:03<00:10,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  86%|########6 | 95/110 [01:04<00:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  87%|########7 | 96/110 [01:05<00:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  88%|########8 | 97/110 [01:05<00:08,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  89%|########9 | 98/110 [01:06<00:07,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  90%|######### | 99/110 [01:06<00:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  91%|######### | 100/110 [01:07<00:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  92%|#########1| 101/110 [01:08<00:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  93%|#########2| 102/110 [01:09<00:05,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  94%|#########3| 103/110 [01:09<00:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  95%|#########4| 104/110 [01:10<00:03,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  95%|#########5| 105/110 [01:11<00:03,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  96%|#########6| 106/110 [01:11<00:02,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:  97%|#########7| 107/110 [01:12<00:02,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0227 ||:  98%|#########8| 108/110 [01:13<00:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:  99%|#########9| 109/110 [01:13<00:00,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||: 100%|##########| 110/110 [01:14<00:00,  1.68it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6875, no_result: 0.0938, loss: 1.2488 ||:   4%|4         | 1/24 [00:00<00:08,  2.71it/s]
+BLEU: 0.7246, acc: 0.6719, no_result: 0.1094, loss: 1.1542 ||:   8%|8         | 2/24 [00:00<00:07,  2.80it/s]
+BLEU: 0.7071, acc: 0.6354, no_result: 0.1562, loss: 1.2991 ||:  12%|#2        | 3/24 [00:01<00:07,  2.82it/s]
+BLEU: 0.7265, acc: 0.6484, no_result: 0.1484, loss: 1.2098 ||:  17%|#6        | 4/24 [00:01<00:07,  2.73it/s]
+BLEU: 0.7497, acc: 0.6750, no_result: 0.1437, loss: 1.1637 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7515, acc: 0.6615, no_result: 0.1354, loss: 1.1658 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.7591, acc: 0.6741, no_result: 0.1250, loss: 1.1464 ||:  29%|##9       | 7/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7550, acc: 0.6836, no_result: 0.1211, loss: 1.1720 ||:  33%|###3      | 8/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.7437, acc: 0.6840, no_result: 0.1285, loss: 1.1697 ||:  38%|###7      | 9/24 [00:03<00:05,  2.89it/s]
+BLEU: 0.7520, acc: 0.6719, no_result: 0.1187, loss: 1.1872 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.7486, acc: 0.6619, no_result: 0.1222, loss: 1.2361 ||:  46%|####5     | 11/24 [00:03<00:04,  2.70it/s]
+BLEU: 0.7418, acc: 0.6693, no_result: 0.1172, loss: 1.2494 ||:  50%|#####     | 12/24 [00:04<00:04,  2.79it/s]
+BLEU: 0.7402, acc: 0.6683, no_result: 0.1226, loss: 1.2720 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.7372, acc: 0.6562, no_result: 0.1205, loss: 1.3143 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.62it/s]
+BLEU: 0.7263, acc: 0.6354, no_result: 0.1417, loss: 1.3750 ||:  62%|######2   | 15/24 [00:05<00:03,  2.42it/s]
+BLEU: 0.7199, acc: 0.6289, no_result: 0.1426, loss: 1.4000 ||:  67%|######6   | 16/24 [00:05<00:03,  2.39it/s]
+BLEU: 0.7194, acc: 0.6250, no_result: 0.1507, loss: 1.3873 ||:  71%|#######   | 17/24 [00:06<00:02,  2.47it/s]
+BLEU: 0.7157, acc: 0.6233, no_result: 0.1528, loss: 1.3975 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7185, acc: 0.6316, no_result: 0.1480, loss: 1.3768 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.54it/s]
+BLEU: 0.7206, acc: 0.6344, no_result: 0.1437, loss: 1.3624 ||:  83%|########3 | 20/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.7173, acc: 0.6324, no_result: 0.1384, loss: 1.3668 ||:  88%|########7 | 21/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.7211, acc: 0.6293, no_result: 0.1364, loss: 1.3620 ||:  92%|#########1| 22/24 [00:08<00:00,  2.66it/s]
+BLEU: 0.7239, acc: 0.6359, no_result: 0.1304, loss: 1.3383 ||:  96%|#########5| 23/24 [00:08<00:00,  2.74it/s]
+BLEU: 0.7241, acc: 0.6325, no_result: 0.1296, loss: 1.3599 ||: 100%|##########| 24/24 [00:08<00:00,  3.39it/s]
+BLEU: 0.7241, acc: 0.6325, no_result: 0.1296, loss: 1.3599 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:   1%|          | 1/110 [00:00<01:13,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:   2%|1         | 2/110 [00:01<01:20,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:   3%|2         | 3/110 [00:02<01:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:   4%|3         | 4/110 [00:02<01:10,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:   5%|4         | 5/110 [00:03<01:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:   5%|5         | 6/110 [00:04<01:08,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:   6%|6         | 7/110 [00:04<01:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:   7%|7         | 8/110 [00:05<01:07,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:   8%|8         | 9/110 [00:06<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:   9%|9         | 10/110 [00:06<01:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  10%|#         | 11/110 [00:07<01:06,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  11%|#         | 12/110 [00:08<01:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  12%|#1        | 13/110 [00:08<01:01,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  13%|#2        | 14/110 [00:09<01:01,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  14%|#3        | 15/110 [00:09<01:01,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  15%|#4        | 16/110 [00:10<01:03,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  15%|#5        | 17/110 [00:11<01:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  16%|#6        | 18/110 [00:11<01:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  17%|#7        | 19/110 [00:12<00:59,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  18%|#8        | 20/110 [00:13<00:58,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  19%|#9        | 21/110 [00:13<00:56,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  20%|##        | 22/110 [00:14<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  21%|##        | 23/110 [00:15<00:58,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  22%|##1       | 24/110 [00:15<00:56,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  23%|##2       | 25/110 [00:16<00:56,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  24%|##3       | 26/110 [00:17<00:52,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  25%|##4       | 27/110 [00:17<00:53,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  25%|##5       | 28/110 [00:18<00:52,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  26%|##6       | 29/110 [00:19<00:51,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  27%|##7       | 30/110 [00:19<00:52,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  28%|##8       | 31/110 [00:20<00:50,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  29%|##9       | 32/110 [00:20<00:48,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0224 ||:  30%|###       | 33/110 [00:21<00:48,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  31%|###       | 34/110 [00:22<00:48,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  32%|###1      | 35/110 [00:22<00:48,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  33%|###2      | 36/110 [00:23<00:47,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0218 ||:  34%|###3      | 37/110 [00:24<00:46,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  35%|###4      | 38/110 [00:24<00:47,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  35%|###5      | 39/110 [00:25<00:46,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  36%|###6      | 40/110 [00:27<01:04,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0218 ||:  37%|###7      | 41/110 [00:27<00:59,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  38%|###8      | 42/110 [00:28<00:54,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0218 ||:  39%|###9      | 43/110 [00:29<00:51,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  40%|####      | 44/110 [00:29<00:48,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  41%|####      | 45/110 [00:30<00:46,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  42%|####1     | 46/110 [00:31<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  43%|####2     | 47/110 [00:31<00:45,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  44%|####3     | 48/110 [00:32<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  45%|####4     | 49/110 [00:33<00:40,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  45%|####5     | 50/110 [00:33<00:38,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  46%|####6     | 51/110 [00:34<00:38,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  47%|####7     | 52/110 [00:35<00:39,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  48%|####8     | 53/110 [00:35<00:37,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  49%|####9     | 54/110 [00:36<00:36,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  50%|#####     | 55/110 [00:37<00:36,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  51%|#####     | 56/110 [00:37<00:35,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  52%|#####1    | 57/110 [00:38<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  53%|#####2    | 58/110 [00:39<00:36,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  54%|#####3    | 59/110 [00:39<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  55%|#####4    | 60/110 [00:40<00:34,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  55%|#####5    | 61/110 [00:41<00:33,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  56%|#####6    | 62/110 [00:42<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  57%|#####7    | 63/110 [00:42<00:35,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  58%|#####8    | 64/110 [00:43<00:34,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  59%|#####9    | 65/110 [00:44<00:35,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  60%|######    | 66/110 [00:45<00:31,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  61%|######    | 67/110 [00:45<00:29,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  62%|######1   | 68/110 [00:46<00:28,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  63%|######2   | 69/110 [00:46<00:27,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  64%|######3   | 70/110 [00:47<00:26,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  65%|######4   | 71/110 [00:48<00:26,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  65%|######5   | 72/110 [00:49<00:30,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  66%|######6   | 73/110 [00:50<00:28,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  67%|######7   | 74/110 [00:50<00:26,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  68%|######8   | 75/110 [00:51<00:25,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  69%|######9   | 76/110 [00:52<00:24,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  70%|#######   | 77/110 [00:52<00:23,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  71%|#######   | 78/110 [00:53<00:22,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  74%|#######3  | 81/110 [00:55<00:18,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  75%|#######4  | 82/110 [00:56<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  77%|#######7  | 85/110 [00:58<00:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  80%|########  | 88/110 [01:00<00:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  81%|########  | 89/110 [01:00<00:14,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  82%|########1 | 90/110 [01:01<00:13,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  83%|########2 | 91/110 [01:02<00:12,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  84%|########3 | 92/110 [01:02<00:11,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  85%|########4 | 93/110 [01:03<00:11,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  85%|########5 | 94/110 [01:04<00:10,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  86%|########6 | 95/110 [01:04<00:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  87%|########7 | 96/110 [01:05<00:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  88%|########8 | 97/110 [01:05<00:08,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  89%|########9 | 98/110 [01:06<00:07,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  90%|######### | 99/110 [01:07<00:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  91%|######### | 100/110 [01:07<00:06,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  92%|#########1| 101/110 [01:08<00:05,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  93%|#########2| 102/110 [01:09<00:04,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  94%|#########3| 103/110 [01:09<00:04,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  95%|#########4| 104/110 [01:10<00:04,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  95%|#########5| 105/110 [01:11<00:03,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  96%|#########6| 106/110 [01:12<00:02,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  97%|#########7| 107/110 [01:12<00:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  98%|#########8| 108/110 [01:13<00:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  99%|#########9| 109/110 [01:14<00:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||: 100%|##########| 110/110 [01:14<00:00,  1.78it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7395, acc: 0.7500, no_result: 0.0000, loss: 1.2770 ||:   4%|4         | 1/24 [00:00<00:08,  2.71it/s]
+BLEU: 0.7389, acc: 0.7344, no_result: 0.0469, loss: 1.1860 ||:   8%|8         | 2/24 [00:00<00:07,  2.82it/s]
+BLEU: 0.7054, acc: 0.6979, no_result: 0.0938, loss: 1.3105 ||:  12%|#2        | 3/24 [00:01<00:07,  2.84it/s]
+BLEU: 0.7252, acc: 0.6875, no_result: 0.1094, loss: 1.2219 ||:  17%|#6        | 4/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.7474, acc: 0.7063, no_result: 0.1125, loss: 1.1743 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7495, acc: 0.6875, no_result: 0.1094, loss: 1.1712 ||:  25%|##5       | 6/24 [00:02<00:06,  2.79it/s]
+BLEU: 0.7565, acc: 0.6964, no_result: 0.1027, loss: 1.1497 ||:  29%|##9       | 7/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7519, acc: 0.6992, no_result: 0.1055, loss: 1.1724 ||:  33%|###3      | 8/24 [00:02<00:05,  2.96it/s]
+BLEU: 0.7424, acc: 0.7118, no_result: 0.1042, loss: 1.1655 ||:  38%|###7      | 9/24 [00:03<00:05,  2.89it/s]
+BLEU: 0.7495, acc: 0.6937, no_result: 0.1000, loss: 1.1849 ||:  42%|####1     | 10/24 [00:03<00:04,  2.83it/s]
+BLEU: 0.7469, acc: 0.6875, no_result: 0.1023, loss: 1.2340 ||:  46%|####5     | 11/24 [00:03<00:04,  2.72it/s]
+BLEU: 0.7409, acc: 0.6901, no_result: 0.1016, loss: 1.2491 ||:  50%|#####     | 12/24 [00:04<00:04,  2.81it/s]
+BLEU: 0.7398, acc: 0.6875, no_result: 0.1082, loss: 1.2719 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.7377, acc: 0.6763, no_result: 0.1049, loss: 1.3189 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.63it/s]
+BLEU: 0.7284, acc: 0.6542, no_result: 0.1271, loss: 1.3782 ||:  62%|######2   | 15/24 [00:05<00:03,  2.43it/s]
+BLEU: 0.7241, acc: 0.6504, no_result: 0.1289, loss: 1.4040 ||:  67%|######6   | 16/24 [00:05<00:03,  2.39it/s]
+BLEU: 0.7213, acc: 0.6397, no_result: 0.1397, loss: 1.3922 ||:  71%|#######   | 17/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7178, acc: 0.6354, no_result: 0.1441, loss: 1.4038 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.7205, acc: 0.6431, no_result: 0.1398, loss: 1.3822 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.7229, acc: 0.6469, no_result: 0.1359, loss: 1.3679 ||:  83%|########3 | 20/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7197, acc: 0.6443, no_result: 0.1310, loss: 1.3712 ||:  88%|########7 | 21/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7235, acc: 0.6378, no_result: 0.1293, loss: 1.3651 ||:  92%|#########1| 22/24 [00:08<00:00,  2.65it/s]
+BLEU: 0.7264, acc: 0.6440, no_result: 0.1250, loss: 1.3405 ||:  96%|#########5| 23/24 [00:08<00:00,  2.72it/s]
+BLEU: 0.7266, acc: 0.6496, no_result: 0.1198, loss: 1.3644 ||: 100%|##########| 24/24 [00:08<00:00,  3.37it/s]
+BLEU: 0.7266, acc: 0.6496, no_result: 0.1198, loss: 1.3644 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0366 ||:   1%|          | 1/110 [00:00<01:24,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0302 ||:   2%|1         | 2/110 [00:01<01:19,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0273 ||:   3%|2         | 3/110 [00:02<01:15,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0231 ||:   4%|3         | 4/110 [00:02<01:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:   5%|4         | 5/110 [00:03<01:11,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0233 ||:   5%|5         | 6/110 [00:04<01:08,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:   6%|6         | 7/110 [00:04<01:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:   7%|7         | 8/110 [00:05<01:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:   8%|8         | 9/110 [00:06<01:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:   9%|9         | 10/110 [00:06<01:10,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  10%|#         | 11/110 [00:07<01:09,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  11%|#         | 12/110 [00:08<01:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  12%|#1        | 13/110 [00:08<01:05,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  13%|#2        | 14/110 [00:09<01:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  14%|#3        | 15/110 [00:10<01:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  15%|#4        | 16/110 [00:11<01:03,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  15%|#5        | 17/110 [00:11<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  16%|#6        | 18/110 [00:12<01:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  17%|#7        | 19/110 [00:13<01:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  18%|#8        | 20/110 [00:13<01:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  19%|#9        | 21/110 [00:14<01:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  20%|##        | 22/110 [00:15<00:57,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  21%|##        | 23/110 [00:15<00:56,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  22%|##1       | 24/110 [00:16<00:56,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  23%|##2       | 25/110 [00:16<00:54,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  24%|##3       | 26/110 [00:17<00:53,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  25%|##4       | 27/110 [00:18<00:52,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  25%|##5       | 28/110 [00:18<00:51,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  26%|##6       | 29/110 [00:19<00:55,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  27%|##7       | 30/110 [00:21<01:17,  1.04it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  28%|##8       | 31/110 [00:21<01:07,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  29%|##9       | 32/110 [00:22<01:01,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  30%|###       | 33/110 [00:23<00:57,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  31%|###       | 34/110 [00:23<00:55,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  32%|###1      | 35/110 [00:24<00:53,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  33%|###2      | 36/110 [00:25<00:51,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  34%|###3      | 37/110 [00:25<00:48,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  35%|###4      | 38/110 [00:26<00:46,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  35%|###5      | 39/110 [00:27<00:45,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  36%|###6      | 40/110 [00:27<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  37%|###7      | 41/110 [00:28<00:45,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  38%|###8      | 42/110 [00:28<00:43,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  39%|###9      | 43/110 [00:29<00:43,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  40%|####      | 44/110 [00:30<00:44,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  41%|####      | 45/110 [00:31<00:44,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  42%|####1     | 46/110 [00:31<00:42,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  43%|####2     | 47/110 [00:32<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  44%|####3     | 48/110 [00:33<00:43,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  45%|####4     | 49/110 [00:33<00:40,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  45%|####5     | 50/110 [00:34<00:40,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  46%|####6     | 51/110 [00:35<00:39,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  47%|####7     | 52/110 [00:35<00:37,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  48%|####8     | 53/110 [00:36<00:37,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  49%|####9     | 54/110 [00:36<00:35,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  50%|#####     | 55/110 [00:37<00:34,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  51%|#####     | 56/110 [00:38<00:34,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  52%|#####1    | 57/110 [00:39<00:37,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  53%|#####2    | 58/110 [00:39<00:36,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  54%|#####3    | 59/110 [00:40<00:34,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  55%|#####4    | 60/110 [00:41<00:33,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  56%|#####6    | 62/110 [00:42<00:31,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  57%|#####7    | 63/110 [00:42<00:30,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  58%|#####8    | 64/110 [00:43<00:29,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  59%|#####9    | 65/110 [00:44<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  60%|######    | 66/110 [00:45<00:30,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  61%|######    | 67/110 [00:45<00:29,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  62%|######1   | 68/110 [00:46<00:28,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  63%|######2   | 69/110 [00:47<00:27,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  64%|######3   | 70/110 [00:47<00:26,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  65%|######4   | 71/110 [00:48<00:27,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  65%|######5   | 72/110 [00:49<00:26,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  66%|######6   | 73/110 [00:49<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  67%|######7   | 74/110 [00:50<00:25,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  68%|######8   | 75/110 [00:51<00:26,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  69%|######9   | 76/110 [00:52<00:25,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  70%|#######   | 77/110 [00:52<00:23,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  71%|#######   | 78/110 [00:53<00:22,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  72%|#######1  | 79/110 [00:54<00:22,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  74%|#######3  | 81/110 [00:55<00:20,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  75%|#######4  | 82/110 [00:56<00:19,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  75%|#######5  | 83/110 [00:57<00:18,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  78%|#######8  | 86/110 [00:59<00:16,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  79%|#######9  | 87/110 [00:59<00:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  80%|########  | 88/110 [01:00<00:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  81%|########  | 89/110 [01:01<00:16,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  82%|########1 | 90/110 [01:02<00:15,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  83%|########2 | 91/110 [01:02<00:14,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  84%|########3 | 92/110 [01:03<00:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  85%|########4 | 93/110 [01:04<00:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  85%|########5 | 94/110 [01:04<00:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  86%|########6 | 95/110 [01:05<00:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  87%|########7 | 96/110 [01:06<00:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  88%|########8 | 97/110 [01:06<00:08,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  89%|########9 | 98/110 [01:07<00:08,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  90%|######### | 99/110 [01:08<00:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  91%|######### | 100/110 [01:09<00:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  92%|#########1| 101/110 [01:09<00:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  93%|#########2| 102/110 [01:10<00:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  94%|#########3| 103/110 [01:11<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  95%|#########4| 104/110 [01:11<00:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  95%|#########5| 105/110 [01:12<00:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  96%|#########6| 106/110 [01:13<00:02,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  97%|#########7| 107/110 [01:13<00:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  98%|#########8| 108/110 [01:14<00:01,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  99%|#########9| 109/110 [01:15<00:00,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||: 100%|##########| 110/110 [01:15<00:00,  1.79it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||: 100%|##########| 110/110 [01:15<00:00,  1.46it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6875, no_result: 0.1250, loss: 1.2258 ||:   4%|4         | 1/24 [00:00<00:08,  2.61it/s]
+BLEU: 0.7007, acc: 0.7031, no_result: 0.0938, loss: 1.1719 ||:   8%|8         | 2/24 [00:00<00:08,  2.55it/s]
+BLEU: 0.6819, acc: 0.6771, no_result: 0.1250, loss: 1.3066 ||:  12%|#2        | 3/24 [00:01<00:08,  2.49it/s]
+BLEU: 0.7082, acc: 0.6719, no_result: 0.1328, loss: 1.2194 ||:  17%|#6        | 4/24 [00:01<00:08,  2.42it/s]
+BLEU: 0.7344, acc: 0.7000, no_result: 0.1313, loss: 1.1647 ||:  21%|##        | 5/24 [00:02<00:08,  2.37it/s]
+BLEU: 0.7322, acc: 0.6823, no_result: 0.1198, loss: 1.1650 ||:  25%|##5       | 6/24 [00:02<00:07,  2.42it/s]
+BLEU: 0.7425, acc: 0.6920, no_result: 0.1116, loss: 1.1455 ||:  29%|##9       | 7/24 [00:02<00:06,  2.54it/s]
+BLEU: 0.7403, acc: 0.6953, no_result: 0.1094, loss: 1.1733 ||:  33%|###3      | 8/24 [00:03<00:06,  2.56it/s]
+BLEU: 0.7308, acc: 0.7014, no_result: 0.1111, loss: 1.1717 ||:  38%|###7      | 9/24 [00:03<00:06,  2.49it/s]
+BLEU: 0.7387, acc: 0.6875, no_result: 0.1031, loss: 1.1895 ||:  42%|####1     | 10/24 [00:04<00:05,  2.44it/s]
+BLEU: 0.7379, acc: 0.6761, no_result: 0.1080, loss: 1.2355 ||:  46%|####5     | 11/24 [00:04<00:05,  2.33it/s]
+BLEU: 0.7308, acc: 0.6823, no_result: 0.1068, loss: 1.2516 ||:  50%|#####     | 12/24 [00:04<00:05,  2.39it/s]
+BLEU: 0.7315, acc: 0.6803, no_result: 0.1130, loss: 1.2729 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.32it/s]
+BLEU: 0.7295, acc: 0.6674, no_result: 0.1071, loss: 1.3211 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.25it/s]
+BLEU: 0.7201, acc: 0.6438, no_result: 0.1333, loss: 1.3818 ||:  62%|######2   | 15/24 [00:06<00:04,  2.07it/s]
+BLEU: 0.7160, acc: 0.6367, no_result: 0.1348, loss: 1.4059 ||:  67%|######6   | 16/24 [00:06<00:03,  2.04it/s]
+BLEU: 0.7133, acc: 0.6305, no_result: 0.1434, loss: 1.3930 ||:  71%|#######   | 17/24 [00:07<00:03,  2.10it/s]
+BLEU: 0.7113, acc: 0.6267, no_result: 0.1476, loss: 1.4067 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.07it/s]
+BLEU: 0.7142, acc: 0.6349, no_result: 0.1431, loss: 1.3841 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.17it/s]
+BLEU: 0.7168, acc: 0.6406, no_result: 0.1375, loss: 1.3693 ||:  83%|########3 | 20/24 [00:08<00:01,  2.29it/s]
+BLEU: 0.7139, acc: 0.6399, no_result: 0.1324, loss: 1.3748 ||:  88%|########7 | 21/24 [00:09<00:01,  2.29it/s]
+BLEU: 0.7182, acc: 0.6364, no_result: 0.1293, loss: 1.3670 ||:  92%|#########1| 22/24 [00:09<00:00,  2.27it/s]
+BLEU: 0.7208, acc: 0.6427, no_result: 0.1250, loss: 1.3431 ||:  96%|#########5| 23/24 [00:09<00:00,  2.33it/s]
+BLEU: 0.7211, acc: 0.6483, no_result: 0.1198, loss: 1.3652 ||: 100%|##########| 24/24 [00:10<00:00,  2.88it/s]
+BLEU: 0.7211, acc: 0.6483, no_result: 0.1198, loss: 1.3652 ||: 100%|##########| 24/24 [00:10<00:00,  2.37it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0345 ||:   1%|          | 1/110 [00:00<01:33,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0308 ||:   2%|1         | 2/110 [00:01<01:17,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0255 ||:   3%|2         | 3/110 [00:02<01:23,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:   4%|3         | 4/110 [00:03<01:19,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0256 ||:   5%|4         | 5/110 [00:03<01:19,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0261 ||:   5%|5         | 6/110 [00:04<01:18,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:   6%|6         | 7/110 [00:05<01:13,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0248 ||:   7%|7         | 8/110 [00:05<01:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:   8%|8         | 9/110 [00:06<01:13,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:   9%|9         | 10/110 [00:07<01:10,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0214 ||:  10%|#         | 11/110 [00:07<01:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  11%|#         | 12/110 [00:08<01:06,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  12%|#1        | 13/110 [00:09<01:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  13%|#2        | 14/110 [00:10<01:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  14%|#3        | 15/110 [00:10<01:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  15%|#4        | 16/110 [00:11<01:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  15%|#5        | 17/110 [00:12<01:04,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  16%|#6        | 18/110 [00:12<01:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  17%|#7        | 19/110 [00:13<01:06,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  18%|#8        | 20/110 [00:14<01:22,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  19%|#9        | 21/110 [00:15<01:14,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  20%|##        | 22/110 [00:16<01:11,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  21%|##        | 23/110 [00:17<01:07,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  22%|##1       | 24/110 [00:17<01:04,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  23%|##2       | 25/110 [00:18<01:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  24%|##3       | 26/110 [00:19<01:01,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  25%|##4       | 27/110 [00:19<01:01,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  25%|##5       | 28/110 [00:20<00:58,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  26%|##6       | 29/110 [00:21<00:57,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  27%|##7       | 30/110 [00:21<00:55,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  28%|##8       | 31/110 [00:22<00:53,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  29%|##9       | 32/110 [00:23<00:59,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  30%|###       | 33/110 [00:24<00:57,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  31%|###       | 34/110 [00:25<00:56,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  32%|###1      | 35/110 [00:25<00:53,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  33%|###2      | 36/110 [00:26<00:50,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  34%|###3      | 37/110 [00:26<00:49,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  35%|###4      | 38/110 [00:27<00:49,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  35%|###5      | 39/110 [00:28<00:49,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  36%|###6      | 40/110 [00:29<00:49,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  37%|###7      | 41/110 [00:29<00:45,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  38%|###8      | 42/110 [00:30<00:44,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  39%|###9      | 43/110 [00:30<00:43,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  40%|####      | 44/110 [00:31<00:43,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  41%|####      | 45/110 [00:32<00:42,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  42%|####1     | 46/110 [00:32<00:42,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  43%|####2     | 47/110 [00:33<00:41,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  44%|####3     | 48/110 [00:34<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  45%|####4     | 49/110 [00:35<00:41,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  45%|####5     | 50/110 [00:35<00:41,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  46%|####6     | 51/110 [00:36<00:39,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  47%|####7     | 52/110 [00:37<00:39,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  48%|####8     | 53/110 [00:37<00:37,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  49%|####9     | 54/110 [00:38<00:35,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  50%|#####     | 55/110 [00:39<00:37,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  51%|#####     | 56/110 [00:39<00:35,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  52%|#####1    | 57/110 [00:40<00:35,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  53%|#####2    | 58/110 [00:41<00:34,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  54%|#####3    | 59/110 [00:41<00:33,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  55%|#####4    | 60/110 [00:42<00:32,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  55%|#####5    | 61/110 [00:42<00:31,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  56%|#####6    | 62/110 [00:43<00:31,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  57%|#####7    | 63/110 [00:44<00:30,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  58%|#####8    | 64/110 [00:44<00:30,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  59%|#####9    | 65/110 [00:45<00:30,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  60%|######    | 66/110 [00:46<00:30,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  61%|######    | 67/110 [00:47<00:32,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  62%|######1   | 68/110 [00:48<00:33,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  63%|######2   | 69/110 [00:48<00:32,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  64%|######3   | 70/110 [00:49<00:31,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  65%|######4   | 71/110 [00:50<00:28,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  65%|######5   | 72/110 [00:51<00:27,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  66%|######6   | 73/110 [00:51<00:26,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  67%|######7   | 74/110 [00:52<00:27,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  68%|######8   | 75/110 [00:53<00:26,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  69%|######9   | 76/110 [00:54<00:25,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  70%|#######   | 77/110 [00:54<00:23,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  71%|#######   | 78/110 [00:55<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  72%|#######1  | 79/110 [00:56<00:21,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  73%|#######2  | 80/110 [00:56<00:20,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  74%|#######3  | 81/110 [00:57<00:19,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  75%|#######4  | 82/110 [00:57<00:17,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  75%|#######5  | 83/110 [00:58<00:17,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  76%|#######6  | 84/110 [00:59<00:16,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  77%|#######7  | 85/110 [00:59<00:16,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  78%|#######8  | 86/110 [01:00<00:16,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  79%|#######9  | 87/110 [01:01<00:17,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  80%|########  | 88/110 [01:02<00:15,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  81%|########  | 89/110 [01:02<00:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  82%|########1 | 90/110 [01:03<00:14,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  83%|########2 | 91/110 [01:04<00:13,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  84%|########3 | 92/110 [01:04<00:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  85%|########4 | 93/110 [01:05<00:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  85%|########5 | 94/110 [01:06<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  86%|########6 | 95/110 [01:06<00:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  87%|########7 | 96/110 [01:07<00:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  88%|########8 | 97/110 [01:08<00:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  89%|########9 | 98/110 [01:08<00:07,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  90%|######### | 99/110 [01:09<00:07,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  91%|######### | 100/110 [01:10<00:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  92%|#########1| 101/110 [01:11<00:06,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  93%|#########2| 102/110 [01:11<00:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  94%|#########3| 103/110 [01:12<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  95%|#########4| 104/110 [01:12<00:03,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  95%|#########5| 105/110 [01:13<00:03,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  96%|#########6| 106/110 [01:14<00:02,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  97%|#########7| 107/110 [01:14<00:01,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  98%|#########8| 108/110 [01:15<00:01,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  99%|#########9| 109/110 [01:16<00:00,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||: 100%|##########| 110/110 [01:16<00:00,  1.90it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||: 100%|##########| 110/110 [01:16<00:00,  1.44it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6875, no_result: 0.0938, loss: 1.2284 ||:   4%|4         | 1/24 [00:00<00:08,  2.69it/s]
+BLEU: 0.7109, acc: 0.7031, no_result: 0.0781, loss: 1.1730 ||:   8%|8         | 2/24 [00:00<00:07,  2.80it/s]
+BLEU: 0.6886, acc: 0.6667, no_result: 0.1146, loss: 1.3162 ||:  12%|#2        | 3/24 [00:01<00:07,  2.80it/s]
+BLEU: 0.7131, acc: 0.6641, no_result: 0.1250, loss: 1.2217 ||:  17%|#6        | 4/24 [00:01<00:07,  2.72it/s]
+BLEU: 0.7405, acc: 0.6875, no_result: 0.1250, loss: 1.1686 ||:  21%|##        | 5/24 [00:01<00:07,  2.69it/s]
+BLEU: 0.7440, acc: 0.6719, no_result: 0.1198, loss: 1.1695 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.7527, acc: 0.6830, no_result: 0.1116, loss: 1.1479 ||:  29%|##9       | 7/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7494, acc: 0.6836, no_result: 0.1133, loss: 1.1768 ||:  33%|###3      | 8/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.7388, acc: 0.6910, no_result: 0.1146, loss: 1.1745 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7463, acc: 0.6750, no_result: 0.1031, loss: 1.1950 ||:  42%|####1     | 10/24 [00:03<00:04,  2.84it/s]
+BLEU: 0.7441, acc: 0.6591, no_result: 0.1080, loss: 1.2451 ||:  46%|####5     | 11/24 [00:03<00:04,  2.72it/s]
+BLEU: 0.7362, acc: 0.6667, no_result: 0.1016, loss: 1.2608 ||:  50%|#####     | 12/24 [00:04<00:04,  2.80it/s]
+BLEU: 0.7346, acc: 0.6659, no_result: 0.1082, loss: 1.2844 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.7346, acc: 0.6540, no_result: 0.1049, loss: 1.3372 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.63it/s]
+BLEU: 0.7264, acc: 0.6333, no_result: 0.1271, loss: 1.3969 ||:  62%|######2   | 15/24 [00:05<00:03,  2.42it/s]
+BLEU: 0.7230, acc: 0.6289, no_result: 0.1309, loss: 1.4203 ||:  67%|######6   | 16/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.7214, acc: 0.6232, no_result: 0.1379, loss: 1.4075 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.7183, acc: 0.6233, no_result: 0.1424, loss: 1.4199 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.7200, acc: 0.6299, no_result: 0.1382, loss: 1.3970 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.7224, acc: 0.6344, no_result: 0.1344, loss: 1.3837 ||:  83%|########3 | 20/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.7193, acc: 0.6310, no_result: 0.1295, loss: 1.3894 ||:  88%|########7 | 21/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.7217, acc: 0.6250, no_result: 0.1293, loss: 1.3832 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.7244, acc: 0.6332, no_result: 0.1250, loss: 1.3587 ||:  96%|#########5| 23/24 [00:08<00:00,  2.71it/s]
+BLEU: 0.7244, acc: 0.6392, no_result: 0.1198, loss: 1.3815 ||: 100%|##########| 24/24 [00:08<00:00,  3.38it/s]
+BLEU: 0.7244, acc: 0.6392, no_result: 0.1198, loss: 1.3815 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:   1%|          | 1/110 [00:00<01:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:   2%|1         | 2/110 [00:01<01:09,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:   3%|2         | 3/110 [00:01<01:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:   4%|3         | 4/110 [00:02<01:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:   5%|4         | 5/110 [00:03<01:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:   5%|5         | 6/110 [00:03<01:08,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:   6%|6         | 7/110 [00:04<01:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:   7%|7         | 8/110 [00:05<01:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:   8%|8         | 9/110 [00:05<01:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:   9%|9         | 10/110 [00:07<01:27,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  10%|#         | 11/110 [00:07<01:19,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  11%|#         | 12/110 [00:08<01:11,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  12%|#1        | 13/110 [00:09<01:07,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  13%|#2        | 14/110 [00:09<01:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  14%|#3        | 15/110 [00:10<01:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  15%|#4        | 16/110 [00:11<01:03,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  15%|#5        | 17/110 [00:11<01:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  16%|#6        | 18/110 [00:12<00:59,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  17%|#7        | 19/110 [00:13<00:57,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  18%|#8        | 20/110 [00:13<00:56,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  19%|#9        | 21/110 [00:14<00:56,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  20%|##        | 22/110 [00:14<00:55,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  21%|##        | 23/110 [00:15<00:55,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  22%|##1       | 24/110 [00:16<00:53,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  23%|##2       | 25/110 [00:16<00:54,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  24%|##3       | 26/110 [00:17<00:57,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  25%|##4       | 27/110 [00:18<00:57,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  25%|##5       | 28/110 [00:19<01:00,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  26%|##6       | 29/110 [00:19<01:00,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  27%|##7       | 30/110 [00:20<00:58,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  28%|##8       | 31/110 [00:21<00:57,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  29%|##9       | 32/110 [00:21<00:55,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  30%|###       | 33/110 [00:22<00:53,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  31%|###       | 34/110 [00:23<00:52,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  32%|###1      | 35/110 [00:24<00:51,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  33%|###2      | 36/110 [00:24<00:49,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  34%|###3      | 37/110 [00:25<00:48,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  35%|###4      | 38/110 [00:25<00:47,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  35%|###5      | 39/110 [00:26<00:46,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  36%|###6      | 40/110 [00:27<00:45,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  37%|###7      | 41/110 [00:27<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  38%|###8      | 42/110 [00:28<00:43,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  39%|###9      | 43/110 [00:29<00:44,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  40%|####      | 44/110 [00:29<00:43,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  41%|####      | 45/110 [00:30<00:42,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  42%|####1     | 46/110 [00:31<00:42,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  43%|####2     | 47/110 [00:31<00:40,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  44%|####3     | 48/110 [00:32<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  45%|####4     | 49/110 [00:33<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  45%|####5     | 50/110 [00:33<00:38,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  46%|####6     | 51/110 [00:34<00:37,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  47%|####7     | 52/110 [00:34<00:35,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  48%|####8     | 53/110 [00:35<00:38,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  49%|####9     | 54/110 [00:36<00:37,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  50%|#####     | 55/110 [00:37<00:37,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  51%|#####     | 56/110 [00:37<00:35,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  52%|#####1    | 57/110 [00:38<00:35,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0225 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  54%|#####3    | 59/110 [00:39<00:33,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0223 ||:  55%|#####4    | 60/110 [00:40<00:34,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:  56%|#####6    | 62/110 [00:41<00:34,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0221 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:  58%|#####8    | 64/110 [00:43<00:31,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  59%|#####9    | 65/110 [00:43<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  60%|######    | 66/110 [00:44<00:29,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0219 ||:  61%|######    | 67/110 [00:45<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0218 ||:  62%|######1   | 68/110 [00:45<00:27,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  63%|######2   | 69/110 [00:46<00:26,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:  64%|######3   | 70/110 [00:47<00:27,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  65%|######4   | 71/110 [00:47<00:26,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:  65%|######5   | 72/110 [00:48<00:26,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  66%|######6   | 73/110 [00:49<00:26,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  67%|######7   | 74/110 [00:50<00:24,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  68%|######8   | 75/110 [00:50<00:23,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  69%|######9   | 76/110 [00:51<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  70%|#######   | 77/110 [00:52<00:22,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  71%|#######   | 78/110 [00:52<00:21,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  72%|#######1  | 79/110 [00:53<00:21,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  74%|#######3  | 81/110 [00:54<00:19,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  75%|#######5  | 83/110 [00:55<00:17,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  76%|#######6  | 84/110 [00:56<00:16,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  80%|########  | 88/110 [00:59<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  81%|########  | 89/110 [01:00<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  82%|########1 | 90/110 [01:00<00:12,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  83%|########2 | 91/110 [01:01<00:12,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  84%|########3 | 92/110 [01:01<00:11,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  85%|########4 | 93/110 [01:02<00:10,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  85%|########5 | 94/110 [01:03<00:10,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  86%|########6 | 95/110 [01:03<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  87%|########7 | 96/110 [01:04<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  88%|########8 | 97/110 [01:05<00:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  89%|########9 | 98/110 [01:05<00:07,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  90%|######### | 99/110 [01:06<00:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  91%|######### | 100/110 [01:07<00:07,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  92%|#########1| 101/110 [01:08<00:06,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  93%|#########2| 102/110 [01:08<00:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  94%|#########3| 103/110 [01:09<00:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  95%|#########4| 104/110 [01:10<00:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  95%|#########5| 105/110 [01:10<00:03,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  96%|#########6| 106/110 [01:11<00:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  97%|#########7| 107/110 [01:12<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  98%|#########8| 108/110 [01:13<00:01,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  99%|#########9| 109/110 [01:13<00:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||: 100%|##########| 110/110 [01:15<00:00,  1.08it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||: 100%|##########| 110/110 [01:15<00:00,  1.46it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2698 ||:   4%|4         | 1/24 [00:00<00:09,  2.38it/s]
+BLEU: 0.7243, acc: 0.6562, no_result: 0.1250, loss: 1.1937 ||:   8%|8         | 2/24 [00:00<00:08,  2.57it/s]
+BLEU: 0.7003, acc: 0.6458, no_result: 0.1354, loss: 1.3184 ||:  12%|#2        | 3/24 [00:01<00:07,  2.68it/s]
+BLEU: 0.7216, acc: 0.6562, no_result: 0.1250, loss: 1.2223 ||:  17%|#6        | 4/24 [00:01<00:07,  2.65it/s]
+BLEU: 0.7470, acc: 0.6813, no_result: 0.1313, loss: 1.1710 ||:  21%|##        | 5/24 [00:01<00:07,  2.64it/s]
+BLEU: 0.7494, acc: 0.6615, no_result: 0.1250, loss: 1.1723 ||:  25%|##5       | 6/24 [00:02<00:06,  2.77it/s]
+BLEU: 0.7573, acc: 0.6741, no_result: 0.1161, loss: 1.1514 ||:  29%|##9       | 7/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.7526, acc: 0.6797, no_result: 0.1172, loss: 1.1790 ||:  33%|###3      | 8/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7416, acc: 0.6875, no_result: 0.1181, loss: 1.1755 ||:  38%|###7      | 9/24 [00:03<00:05,  2.88it/s]
+BLEU: 0.7502, acc: 0.6750, no_result: 0.1062, loss: 1.1945 ||:  42%|####1     | 10/24 [00:03<00:04,  2.83it/s]
+BLEU: 0.7481, acc: 0.6619, no_result: 0.1108, loss: 1.2432 ||:  46%|####5     | 11/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7425, acc: 0.6719, no_result: 0.1042, loss: 1.2588 ||:  50%|#####     | 12/24 [00:04<00:04,  2.80it/s]
+BLEU: 0.7413, acc: 0.6707, no_result: 0.1082, loss: 1.2809 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7395, acc: 0.6607, no_result: 0.1027, loss: 1.3284 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.63it/s]
+BLEU: 0.7293, acc: 0.6396, no_result: 0.1250, loss: 1.3883 ||:  62%|######2   | 15/24 [00:05<00:03,  2.43it/s]
+BLEU: 0.7253, acc: 0.6348, no_result: 0.1270, loss: 1.4121 ||:  67%|######6   | 16/24 [00:06<00:03,  2.39it/s]
+BLEU: 0.7224, acc: 0.6305, no_result: 0.1342, loss: 1.3988 ||:  71%|#######   | 17/24 [00:06<00:02,  2.46it/s]
+BLEU: 0.7197, acc: 0.6285, no_result: 0.1389, loss: 1.4105 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7213, acc: 0.6349, no_result: 0.1349, loss: 1.3875 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.53it/s]
+BLEU: 0.7236, acc: 0.6406, no_result: 0.1313, loss: 1.3758 ||:  83%|########3 | 20/24 [00:07<00:01,  2.68it/s]
+BLEU: 0.7213, acc: 0.6414, no_result: 0.1265, loss: 1.3808 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7241, acc: 0.6364, no_result: 0.1236, loss: 1.3767 ||:  92%|#########1| 22/24 [00:08<00:00,  2.63it/s]
+BLEU: 0.7268, acc: 0.6440, no_result: 0.1182, loss: 1.3528 ||:  96%|#########5| 23/24 [00:08<00:00,  2.71it/s]
+BLEU: 0.7270, acc: 0.6496, no_result: 0.1133, loss: 1.3765 ||: 100%|##########| 24/24 [00:08<00:00,  3.37it/s]
+BLEU: 0.7270, acc: 0.6496, no_result: 0.1133, loss: 1.3765 ||: 100%|##########| 24/24 [00:08<00:00,  2.73it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:   1%|          | 1/110 [00:00<01:22,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0242 ||:   2%|1         | 2/110 [00:01<01:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0280 ||:   3%|2         | 3/110 [00:02<01:18,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:   4%|3         | 4/110 [00:02<01:17,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0241 ||:   5%|4         | 5/110 [00:03<01:16,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0228 ||:   5%|5         | 6/110 [00:04<01:15,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:   6%|6         | 7/110 [00:05<01:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:   7%|7         | 8/110 [00:05<01:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:   8%|8         | 9/110 [00:06<01:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:   9%|9         | 10/110 [00:07<01:18,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  10%|#         | 11/110 [00:08<01:14,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  11%|#         | 12/110 [00:08<01:14,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  12%|#1        | 13/110 [00:09<01:11,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  13%|#2        | 14/110 [00:10<01:10,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  14%|#3        | 15/110 [00:11<01:14,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  15%|#4        | 16/110 [00:11<01:12,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  15%|#5        | 17/110 [00:12<01:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  16%|#6        | 18/110 [00:13<01:07,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  17%|#7        | 19/110 [00:13<01:04,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  18%|#8        | 20/110 [00:14<01:04,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  19%|#9        | 21/110 [00:15<01:07,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:  20%|##        | 22/110 [00:16<01:05,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:  21%|##        | 23/110 [00:16<01:04,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  22%|##1       | 24/110 [00:17<01:01,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:  23%|##2       | 25/110 [00:18<01:01,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  24%|##3       | 26/110 [00:19<00:58,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:  25%|##4       | 27/110 [00:19<00:58,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0212 ||:  25%|##5       | 28/110 [00:20<01:00,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:  26%|##6       | 29/110 [00:21<01:02,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  27%|##7       | 30/110 [00:22<00:59,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  28%|##8       | 31/110 [00:22<00:59,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  29%|##9       | 32/110 [00:23<00:57,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  30%|###       | 33/110 [00:24<00:57,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  31%|###       | 34/110 [00:24<00:53,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  32%|###1      | 35/110 [00:25<00:56,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  33%|###2      | 36/110 [00:26<00:53,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  34%|###3      | 37/110 [00:27<00:52,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  35%|###4      | 38/110 [00:27<00:51,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  35%|###5      | 39/110 [00:28<00:51,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  36%|###6      | 40/110 [00:29<00:48,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  37%|###7      | 41/110 [00:30<00:51,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  38%|###8      | 42/110 [00:30<00:49,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  39%|###9      | 43/110 [00:31<00:47,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  40%|####      | 44/110 [00:32<00:47,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  41%|####      | 45/110 [00:32<00:46,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  42%|####1     | 46/110 [00:33<00:47,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  43%|####2     | 47/110 [00:34<00:46,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  44%|####3     | 48/110 [00:35<00:45,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  45%|####4     | 49/110 [00:35<00:43,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  45%|####5     | 50/110 [00:36<00:42,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  46%|####6     | 51/110 [00:37<00:42,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  47%|####7     | 52/110 [00:37<00:40,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  48%|####8     | 53/110 [00:38<00:38,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  49%|####9     | 54/110 [00:39<00:37,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  50%|#####     | 55/110 [00:39<00:37,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  51%|#####     | 56/110 [00:40<00:41,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  52%|#####1    | 57/110 [00:41<00:40,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  53%|#####2    | 58/110 [00:42<00:40,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  54%|#####3    | 59/110 [00:43<00:37,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  55%|#####4    | 60/110 [00:43<00:36,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  55%|#####5    | 61/110 [00:44<00:36,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  56%|#####6    | 62/110 [00:45<00:36,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  57%|#####7    | 63/110 [00:45<00:33,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  58%|#####8    | 64/110 [00:46<00:31,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  59%|#####9    | 65/110 [00:47<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  60%|######    | 66/110 [00:48<00:32,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  61%|######    | 67/110 [00:48<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  62%|######1   | 68/110 [00:49<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  63%|######2   | 69/110 [00:50<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  64%|######3   | 70/110 [00:50<00:28,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  65%|######4   | 71/110 [00:51<00:27,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  65%|######5   | 72/110 [00:52<00:26,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  66%|######6   | 73/110 [00:52<00:25,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  67%|######7   | 74/110 [00:53<00:24,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  68%|######8   | 75/110 [00:54<00:23,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  69%|######9   | 76/110 [00:55<00:24,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  70%|#######   | 77/110 [00:55<00:23,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  71%|#######   | 78/110 [00:56<00:23,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  72%|#######1  | 79/110 [00:57<00:22,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  73%|#######2  | 80/110 [00:58<00:21,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  74%|#######3  | 81/110 [00:58<00:20,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  75%|#######4  | 82/110 [00:59<00:19,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  75%|#######5  | 83/110 [01:00<00:20,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  76%|#######6  | 84/110 [01:01<00:19,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  77%|#######7  | 85/110 [01:01<00:18,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  78%|#######8  | 86/110 [01:02<00:17,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  79%|#######9  | 87/110 [01:03<00:17,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  80%|########  | 88/110 [01:03<00:16,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  81%|########  | 89/110 [01:04<00:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  82%|########1 | 90/110 [01:05<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  83%|########2 | 91/110 [01:05<00:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  84%|########3 | 92/110 [01:06<00:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  85%|########4 | 93/110 [01:07<00:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  85%|########5 | 94/110 [01:08<00:11,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  86%|########6 | 95/110 [01:08<00:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  87%|########7 | 96/110 [01:09<00:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  88%|########8 | 97/110 [01:10<00:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  89%|########9 | 98/110 [01:11<00:08,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  90%|######### | 99/110 [01:11<00:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  91%|######### | 100/110 [01:13<00:09,  1.06it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  92%|#########1| 101/110 [01:13<00:07,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  93%|#########2| 102/110 [01:14<00:06,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  94%|#########3| 103/110 [01:15<00:05,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  95%|#########4| 104/110 [01:16<00:04,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  95%|#########5| 105/110 [01:16<00:03,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  96%|#########6| 106/110 [01:17<00:02,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  97%|#########7| 107/110 [01:18<00:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  98%|#########8| 108/110 [01:18<00:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  99%|#########9| 109/110 [01:19<00:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||: 100%|##########| 110/110 [01:19<00:00,  1.76it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||: 100%|##########| 110/110 [01:19<00:00,  1.38it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6875, no_result: 0.0625, loss: 1.2585 ||:   4%|4         | 1/24 [00:00<00:09,  2.32it/s]
+BLEU: 0.7140, acc: 0.6719, no_result: 0.0938, loss: 1.1954 ||:   8%|8         | 2/24 [00:00<00:09,  2.42it/s]
+BLEU: 0.6931, acc: 0.6354, no_result: 0.1354, loss: 1.3208 ||:  12%|#2        | 3/24 [00:01<00:08,  2.41it/s]
+BLEU: 0.7164, acc: 0.6484, no_result: 0.1328, loss: 1.2305 ||:  17%|#6        | 4/24 [00:01<00:08,  2.37it/s]
+BLEU: 0.7432, acc: 0.6750, no_result: 0.1313, loss: 1.1740 ||:  21%|##        | 5/24 [00:02<00:08,  2.34it/s]
+BLEU: 0.7420, acc: 0.6562, no_result: 0.1250, loss: 1.1745 ||:  25%|##5       | 6/24 [00:02<00:07,  2.42it/s]
+BLEU: 0.7509, acc: 0.6696, no_result: 0.1161, loss: 1.1517 ||:  29%|##9       | 7/24 [00:02<00:06,  2.54it/s]
+BLEU: 0.7453, acc: 0.6719, no_result: 0.1172, loss: 1.1796 ||:  33%|###3      | 8/24 [00:03<00:06,  2.54it/s]
+BLEU: 0.7352, acc: 0.6840, no_result: 0.1146, loss: 1.1765 ||:  38%|###7      | 9/24 [00:03<00:06,  2.48it/s]
+BLEU: 0.7445, acc: 0.6719, no_result: 0.1031, loss: 1.1958 ||:  42%|####1     | 10/24 [00:04<00:05,  2.42it/s]
+BLEU: 0.7425, acc: 0.6619, no_result: 0.1080, loss: 1.2462 ||:  46%|####5     | 11/24 [00:04<00:05,  2.33it/s]
+BLEU: 0.7362, acc: 0.6719, no_result: 0.1016, loss: 1.2597 ||:  50%|#####     | 12/24 [00:04<00:05,  2.40it/s]
+BLEU: 0.7351, acc: 0.6707, no_result: 0.1082, loss: 1.2856 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.33it/s]
+BLEU: 0.7337, acc: 0.6585, no_result: 0.1027, loss: 1.3333 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.25it/s]
+BLEU: 0.7236, acc: 0.6396, no_result: 0.1229, loss: 1.3938 ||:  62%|######2   | 15/24 [00:06<00:04,  2.07it/s]
+BLEU: 0.7200, acc: 0.6348, no_result: 0.1250, loss: 1.4169 ||:  67%|######6   | 16/24 [00:06<00:03,  2.04it/s]
+BLEU: 0.7175, acc: 0.6287, no_result: 0.1305, loss: 1.4040 ||:  71%|#######   | 17/24 [00:07<00:03,  2.10it/s]
+BLEU: 0.7140, acc: 0.6250, no_result: 0.1354, loss: 1.4159 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.09it/s]
+BLEU: 0.7167, acc: 0.6332, no_result: 0.1316, loss: 1.3926 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.18it/s]
+BLEU: 0.7193, acc: 0.6391, no_result: 0.1281, loss: 1.3805 ||:  83%|########3 | 20/24 [00:08<00:01,  2.30it/s]
+BLEU: 0.7172, acc: 0.6399, no_result: 0.1235, loss: 1.3867 ||:  88%|########7 | 21/24 [00:09<00:01,  2.31it/s]
+BLEU: 0.7222, acc: 0.6378, no_result: 0.1207, loss: 1.3811 ||:  92%|#########1| 22/24 [00:09<00:00,  2.28it/s]
+BLEU: 0.7249, acc: 0.6454, no_result: 0.1155, loss: 1.3575 ||:  96%|#########5| 23/24 [00:09<00:00,  2.36it/s]
+BLEU: 0.7246, acc: 0.6509, no_result: 0.1107, loss: 1.3818 ||: 100%|##########| 24/24 [00:10<00:00,  2.91it/s]
+BLEU: 0.7246, acc: 0.6509, no_result: 0.1107, loss: 1.3818 ||: 100%|##########| 24/24 [00:10<00:00,  2.37it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:   1%|          | 1/110 [00:00<01:23,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0213 ||:   2%|1         | 2/110 [00:01<01:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0220 ||:   3%|2         | 3/110 [00:02<01:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0216 ||:   4%|3         | 4/110 [00:02<01:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:   5%|4         | 5/110 [00:03<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:   5%|5         | 6/110 [00:04<01:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:   6%|6         | 7/110 [00:04<01:13,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:   7%|7         | 8/110 [00:05<01:10,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:   8%|8         | 9/110 [00:06<01:15,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:   9%|9         | 10/110 [00:07<01:12,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  10%|#         | 11/110 [00:07<01:09,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  11%|#         | 12/110 [00:08<01:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  12%|#1        | 13/110 [00:08<01:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  13%|#2        | 14/110 [00:09<01:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  14%|#3        | 15/110 [00:10<01:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  15%|#4        | 16/110 [00:10<01:00,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  15%|#5        | 17/110 [00:11<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  16%|#6        | 18/110 [00:12<00:59,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  17%|#7        | 19/110 [00:12<01:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  18%|#8        | 20/110 [00:13<01:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  19%|#9        | 21/110 [00:14<00:59,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  20%|##        | 22/110 [00:14<00:58,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  21%|##        | 23/110 [00:15<00:58,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  22%|##1       | 24/110 [00:16<00:58,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  23%|##2       | 25/110 [00:16<00:55,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  24%|##3       | 26/110 [00:17<00:55,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  25%|##4       | 27/110 [00:18<00:54,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  25%|##5       | 28/110 [00:18<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  26%|##6       | 29/110 [00:19<00:54,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  27%|##7       | 30/110 [00:20<00:52,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  28%|##8       | 31/110 [00:20<00:51,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  29%|##9       | 32/110 [00:21<00:49,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  30%|###       | 33/110 [00:21<00:47,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  31%|###       | 34/110 [00:22<00:47,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  32%|###1      | 35/110 [00:23<00:52,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  33%|###2      | 36/110 [00:24<00:52,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  34%|###3      | 37/110 [00:24<00:51,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  35%|###4      | 38/110 [00:25<00:49,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  35%|###5      | 39/110 [00:26<00:47,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  36%|###6      | 40/110 [00:26<00:46,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  37%|###7      | 41/110 [00:27<00:45,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  38%|###8      | 42/110 [00:28<00:47,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  39%|###9      | 43/110 [00:28<00:45,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  40%|####      | 44/110 [00:29<00:46,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  41%|####      | 45/110 [00:30<00:44,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  42%|####1     | 46/110 [00:30<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  43%|####2     | 47/110 [00:31<00:42,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  44%|####3     | 48/110 [00:32<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  45%|####4     | 49/110 [00:33<00:43,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  45%|####5     | 50/110 [00:33<00:40,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  46%|####6     | 51/110 [00:34<00:41,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  47%|####7     | 52/110 [00:35<00:39,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  48%|####8     | 53/110 [00:35<00:38,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  49%|####9     | 54/110 [00:36<00:37,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  50%|#####     | 55/110 [00:37<00:35,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  51%|#####     | 56/110 [00:37<00:35,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  52%|#####1    | 57/110 [00:38<00:33,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  53%|#####2    | 58/110 [00:38<00:33,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  54%|#####3    | 59/110 [00:39<00:32,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  55%|#####4    | 60/110 [00:40<00:31,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  55%|#####5    | 61/110 [00:40<00:32,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  56%|#####6    | 62/110 [00:41<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  57%|#####7    | 63/110 [00:42<00:32,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  58%|#####8    | 64/110 [00:43<00:31,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  59%|#####9    | 65/110 [00:43<00:31,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  60%|######    | 66/110 [00:44<00:29,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  61%|######    | 67/110 [00:44<00:27,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  62%|######1   | 68/110 [00:45<00:27,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  63%|######2   | 69/110 [00:46<00:26,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  64%|######3   | 70/110 [00:47<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  65%|######4   | 71/110 [00:47<00:26,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  65%|######5   | 72/110 [00:48<00:27,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  66%|######6   | 73/110 [00:49<00:25,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  67%|######7   | 74/110 [00:49<00:23,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  68%|######8   | 75/110 [00:50<00:23,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  69%|######9   | 76/110 [00:51<00:22,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  70%|#######   | 77/110 [00:51<00:22,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  71%|#######   | 78/110 [00:52<00:21,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  73%|#######2  | 80/110 [00:53<00:18,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  74%|#######3  | 81/110 [00:54<00:17,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  75%|#######4  | 82/110 [00:54<00:17,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  75%|#######5  | 83/110 [00:55<00:16,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  76%|#######6  | 84/110 [00:56<00:16,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  77%|#######7  | 85/110 [00:56<00:16,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  78%|#######8  | 86/110 [00:57<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  80%|########  | 88/110 [00:58<00:14,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  81%|########  | 89/110 [00:59<00:13,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  82%|########1 | 90/110 [01:01<00:20,  1.04s/it]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  83%|########2 | 91/110 [01:02<00:17,  1.07it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  84%|########3 | 92/110 [01:02<00:15,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  85%|########4 | 93/110 [01:03<00:13,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  85%|########5 | 94/110 [01:04<00:12,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  86%|########6 | 95/110 [01:04<00:11,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  87%|########7 | 96/110 [01:05<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  88%|########8 | 97/110 [01:06<00:08,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  89%|########9 | 98/110 [01:06<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  90%|######### | 99/110 [01:07<00:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  91%|######### | 100/110 [01:08<00:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  92%|#########1| 101/110 [01:08<00:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  93%|#########2| 102/110 [01:09<00:05,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  94%|#########3| 103/110 [01:10<00:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  95%|#########4| 104/110 [01:10<00:03,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  95%|#########5| 105/110 [01:11<00:03,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  96%|#########6| 106/110 [01:11<00:02,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  97%|#########7| 107/110 [01:12<00:01,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  98%|#########8| 108/110 [01:13<00:01,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  99%|#########9| 109/110 [01:13<00:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||: 100%|##########| 110/110 [01:14<00:00,  1.81it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2638 ||:   4%|4         | 1/24 [00:00<00:08,  2.73it/s]
+BLEU: 0.7001, acc: 0.6406, no_result: 0.1250, loss: 1.1993 ||:   8%|8         | 2/24 [00:00<00:07,  2.81it/s]
+BLEU: 0.6792, acc: 0.6146, no_result: 0.1458, loss: 1.3187 ||:  12%|#2        | 3/24 [00:01<00:07,  2.83it/s]
+BLEU: 0.7064, acc: 0.6250, no_result: 0.1484, loss: 1.2271 ||:  17%|#6        | 4/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.7331, acc: 0.6625, no_result: 0.1437, loss: 1.1764 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7325, acc: 0.6406, no_result: 0.1406, loss: 1.1746 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.7428, acc: 0.6562, no_result: 0.1295, loss: 1.1514 ||:  29%|##9       | 7/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7366, acc: 0.6641, no_result: 0.1289, loss: 1.1798 ||:  33%|###3      | 8/24 [00:02<00:05,  2.96it/s]
+BLEU: 0.7275, acc: 0.6771, no_result: 0.1250, loss: 1.1755 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7364, acc: 0.6687, no_result: 0.1125, loss: 1.1958 ||:  42%|####1     | 10/24 [00:03<00:04,  2.83it/s]
+BLEU: 0.7358, acc: 0.6562, no_result: 0.1136, loss: 1.2477 ||:  46%|####5     | 11/24 [00:03<00:04,  2.71it/s]
+BLEU: 0.7316, acc: 0.6667, no_result: 0.1068, loss: 1.2608 ||:  50%|#####     | 12/24 [00:04<00:04,  2.81it/s]
+BLEU: 0.7313, acc: 0.6659, no_result: 0.1130, loss: 1.2857 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7306, acc: 0.6540, no_result: 0.1071, loss: 1.3346 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.61it/s]
+BLEU: 0.7211, acc: 0.6333, no_result: 0.1292, loss: 1.3940 ||:  62%|######2   | 15/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.7174, acc: 0.6270, no_result: 0.1309, loss: 1.4192 ||:  67%|######6   | 16/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.7154, acc: 0.6232, no_result: 0.1379, loss: 1.4067 ||:  71%|#######   | 17/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7113, acc: 0.6198, no_result: 0.1406, loss: 1.4205 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.7133, acc: 0.6266, no_result: 0.1365, loss: 1.3981 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.7165, acc: 0.6312, no_result: 0.1328, loss: 1.3851 ||:  83%|########3 | 20/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7146, acc: 0.6324, no_result: 0.1280, loss: 1.3898 ||:  88%|########7 | 21/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7211, acc: 0.6307, no_result: 0.1264, loss: 1.3837 ||:  92%|#########1| 22/24 [00:08<00:00,  2.65it/s]
+BLEU: 0.7236, acc: 0.6386, no_result: 0.1209, loss: 1.3603 ||:  96%|#########5| 23/24 [00:08<00:00,  2.73it/s]
+BLEU: 0.7238, acc: 0.6444, no_result: 0.1159, loss: 1.3830 ||: 100%|##########| 24/24 [00:08<00:00,  3.39it/s]
+BLEU: 0.7238, acc: 0.6444, no_result: 0.1159, loss: 1.3830 ||: 100%|##########| 24/24 [00:08<00:00,  2.76it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0265 ||:   1%|          | 1/110 [00:00<01:33,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0238 ||:   2%|1         | 2/110 [00:01<01:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:   3%|2         | 3/110 [00:02<01:21,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:   4%|3         | 4/110 [00:02<01:14,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:   5%|4         | 5/110 [00:03<01:16,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:   5%|5         | 6/110 [00:04<01:18,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:   6%|6         | 7/110 [00:05<01:17,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0211 ||:   7%|7         | 8/110 [00:05<01:13,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:   8%|8         | 9/110 [00:06<01:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0208 ||:   9%|9         | 10/110 [00:07<01:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  10%|#         | 11/110 [00:07<01:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  11%|#         | 12/110 [00:08<01:05,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  12%|#1        | 13/110 [00:09<01:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  13%|#2        | 14/110 [00:09<01:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  14%|#3        | 15/110 [00:10<01:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  15%|#4        | 16/110 [00:11<01:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  15%|#5        | 17/110 [00:11<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  16%|#6        | 18/110 [00:12<01:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  17%|#7        | 19/110 [00:13<01:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  18%|#8        | 20/110 [00:13<00:59,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  19%|#9        | 21/110 [00:14<00:58,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  20%|##        | 22/110 [00:15<00:59,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  21%|##        | 23/110 [00:15<00:57,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  22%|##1       | 24/110 [00:16<00:56,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  23%|##2       | 25/110 [00:17<00:54,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  24%|##3       | 26/110 [00:17<00:55,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  25%|##4       | 27/110 [00:18<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  25%|##5       | 28/110 [00:19<00:54,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  26%|##6       | 29/110 [00:19<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  27%|##7       | 30/110 [00:20<00:52,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  28%|##8       | 31/110 [00:20<00:50,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  29%|##9       | 32/110 [00:21<00:49,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  30%|###       | 33/110 [00:22<00:50,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  31%|###       | 34/110 [00:22<00:48,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  32%|###1      | 35/110 [00:23<00:49,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0205 ||:  33%|###2      | 36/110 [00:24<00:47,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0206 ||:  34%|###3      | 37/110 [00:24<00:46,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  35%|###4      | 38/110 [00:25<00:46,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  35%|###5      | 39/110 [00:26<00:45,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  36%|###6      | 40/110 [00:26<00:46,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  37%|###7      | 41/110 [00:27<00:47,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  38%|###8      | 42/110 [00:28<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  39%|###9      | 43/110 [00:28<00:44,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:  40%|####      | 44/110 [00:29<00:43,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  41%|####      | 45/110 [00:30<00:45,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  42%|####1     | 46/110 [00:31<00:45,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  43%|####2     | 47/110 [00:31<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0199 ||:  44%|####3     | 48/110 [00:32<00:42,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:  45%|####4     | 49/110 [00:33<00:41,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  45%|####5     | 50/110 [00:33<00:38,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  46%|####6     | 51/110 [00:34<00:37,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  47%|####7     | 52/110 [00:35<00:41,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  48%|####8     | 53/110 [00:35<00:39,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  49%|####9     | 54/110 [00:36<00:37,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  50%|#####     | 55/110 [00:37<00:36,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  51%|#####     | 56/110 [00:37<00:35,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  52%|#####1    | 57/110 [00:38<00:36,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  53%|#####2    | 58/110 [00:39<00:36,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  54%|#####3    | 59/110 [00:39<00:34,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  55%|#####4    | 60/110 [00:40<00:35,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  55%|#####5    | 61/110 [00:41<00:33,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  56%|#####6    | 62/110 [00:41<00:30,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  58%|#####8    | 64/110 [00:43<00:31,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  59%|#####9    | 65/110 [00:43<00:29,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  60%|######    | 66/110 [00:44<00:29,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  61%|######    | 67/110 [00:45<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  62%|######1   | 68/110 [00:45<00:28,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  63%|######2   | 69/110 [00:46<00:26,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  64%|######3   | 70/110 [00:47<00:25,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  65%|######4   | 71/110 [00:47<00:24,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  65%|######5   | 72/110 [00:48<00:24,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  66%|######6   | 73/110 [00:49<00:24,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  67%|######7   | 74/110 [00:49<00:24,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  68%|######8   | 75/110 [00:50<00:24,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  69%|######9   | 76/110 [00:51<00:23,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  70%|#######   | 77/110 [00:51<00:22,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  71%|#######   | 78/110 [00:52<00:21,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0194 ||:  73%|#######2  | 80/110 [00:54<00:26,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  74%|#######3  | 81/110 [00:55<00:24,  1.17it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  75%|#######4  | 82/110 [00:56<00:23,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  75%|#######5  | 83/110 [00:56<00:21,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  77%|#######7  | 85/110 [00:58<00:17,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  80%|########  | 88/110 [00:59<00:13,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  81%|########  | 89/110 [01:00<00:13,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  82%|########1 | 90/110 [01:01<00:13,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  83%|########2 | 91/110 [01:01<00:12,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  84%|########3 | 92/110 [01:02<00:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  85%|########4 | 93/110 [01:03<00:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  85%|########5 | 94/110 [01:03<00:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  86%|########6 | 95/110 [01:04<00:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  87%|########7 | 96/110 [01:05<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  88%|########8 | 97/110 [01:06<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  89%|########9 | 98/110 [01:06<00:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  90%|######### | 99/110 [01:07<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  91%|######### | 100/110 [01:07<00:06,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  92%|#########1| 101/110 [01:08<00:05,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  93%|#########2| 102/110 [01:09<00:05,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  94%|#########3| 103/110 [01:09<00:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  95%|#########4| 104/110 [01:10<00:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  95%|#########5| 105/110 [01:11<00:03,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  96%|#########6| 106/110 [01:11<00:02,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  97%|#########7| 107/110 [01:12<00:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  98%|#########8| 108/110 [01:13<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  99%|#########9| 109/110 [01:13<00:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||: 100%|##########| 110/110 [01:14<00:00,  1.77it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2711 ||:   4%|4         | 1/24 [00:00<00:09,  2.46it/s]
+BLEU: 0.7167, acc: 0.6406, no_result: 0.1250, loss: 1.1946 ||:   8%|8         | 2/24 [00:00<00:08,  2.69it/s]
+BLEU: 0.6903, acc: 0.6146, no_result: 0.1562, loss: 1.3231 ||:  12%|#2        | 3/24 [00:01<00:07,  2.75it/s]
+BLEU: 0.7143, acc: 0.6250, no_result: 0.1562, loss: 1.2358 ||:  17%|#6        | 4/24 [00:01<00:07,  2.69it/s]
+BLEU: 0.7393, acc: 0.6562, no_result: 0.1562, loss: 1.1819 ||:  21%|##        | 5/24 [00:01<00:07,  2.66it/s]
+BLEU: 0.7366, acc: 0.6510, no_result: 0.1458, loss: 1.1811 ||:  25%|##5       | 6/24 [00:02<00:06,  2.76it/s]
+BLEU: 0.7463, acc: 0.6652, no_result: 0.1339, loss: 1.1584 ||:  29%|##9       | 7/24 [00:02<00:05,  2.90it/s]
+BLEU: 0.7437, acc: 0.6758, no_result: 0.1289, loss: 1.1873 ||:  33%|###3      | 8/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7338, acc: 0.6840, no_result: 0.1285, loss: 1.1817 ||:  38%|###7      | 9/24 [00:03<00:05,  2.88it/s]
+BLEU: 0.7421, acc: 0.6750, no_result: 0.1219, loss: 1.2015 ||:  42%|####1     | 10/24 [00:03<00:04,  2.83it/s]
+BLEU: 0.7408, acc: 0.6648, no_result: 0.1222, loss: 1.2521 ||:  46%|####5     | 11/24 [00:03<00:04,  2.71it/s]
+BLEU: 0.7372, acc: 0.6745, no_result: 0.1146, loss: 1.2653 ||:  50%|#####     | 12/24 [00:04<00:04,  2.80it/s]
+BLEU: 0.7355, acc: 0.6731, no_result: 0.1202, loss: 1.2902 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.7341, acc: 0.6607, no_result: 0.1138, loss: 1.3382 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.61it/s]
+BLEU: 0.7251, acc: 0.6417, no_result: 0.1354, loss: 1.3964 ||:  62%|######2   | 15/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.7214, acc: 0.6348, no_result: 0.1367, loss: 1.4191 ||:  67%|######6   | 16/24 [00:06<00:03,  2.38it/s]
+BLEU: 0.7198, acc: 0.6305, no_result: 0.1452, loss: 1.4060 ||:  71%|#######   | 17/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7164, acc: 0.6267, no_result: 0.1493, loss: 1.4185 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.7185, acc: 0.6332, no_result: 0.1447, loss: 1.3964 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.7212, acc: 0.6391, no_result: 0.1406, loss: 1.3831 ||:  83%|########3 | 20/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7178, acc: 0.6399, no_result: 0.1354, loss: 1.3873 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7234, acc: 0.6364, no_result: 0.1349, loss: 1.3811 ||:  92%|#########1| 22/24 [00:08<00:00,  2.63it/s]
+BLEU: 0.7269, acc: 0.6440, no_result: 0.1291, loss: 1.3572 ||:  96%|#########5| 23/24 [00:08<00:00,  2.72it/s]
+BLEU: 0.7268, acc: 0.6496, no_result: 0.1237, loss: 1.3803 ||: 100%|##########| 24/24 [00:08<00:00,  3.38it/s]
+BLEU: 0.7268, acc: 0.6496, no_result: 0.1237, loss: 1.3803 ||: 100%|##########| 24/24 [00:08<00:00,  2.73it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0279 ||:   1%|          | 1/110 [00:00<01:35,  1.14it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:   2%|1         | 2/110 [00:01<01:19,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:   3%|2         | 3/110 [00:02<01:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:   4%|3         | 4/110 [00:02<01:17,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:   5%|4         | 5/110 [00:03<01:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:   5%|5         | 6/110 [00:04<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:   6%|6         | 7/110 [00:04<01:06,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:   7%|7         | 8/110 [00:05<01:14,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:   8%|8         | 9/110 [00:06<01:14,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:   9%|9         | 10/110 [00:07<01:11,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  10%|#         | 11/110 [00:07<01:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  11%|#         | 12/110 [00:08<01:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  12%|#1        | 13/110 [00:09<01:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  13%|#2        | 14/110 [00:09<01:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  14%|#3        | 15/110 [00:10<01:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  15%|#4        | 16/110 [00:11<01:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  15%|#5        | 17/110 [00:11<01:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  16%|#6        | 18/110 [00:12<01:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  17%|#7        | 19/110 [00:13<01:03,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  18%|#8        | 20/110 [00:13<01:01,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  19%|#9        | 21/110 [00:14<01:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  20%|##        | 22/110 [00:15<01:02,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  21%|##        | 23/110 [00:16<01:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  22%|##1       | 24/110 [00:16<00:57,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  23%|##2       | 25/110 [00:17<00:55,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  24%|##3       | 26/110 [00:17<00:53,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  25%|##4       | 27/110 [00:18<00:54,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  25%|##5       | 28/110 [00:19<00:54,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  26%|##6       | 29/110 [00:19<00:55,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  27%|##7       | 30/110 [00:20<00:54,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  28%|##8       | 31/110 [00:21<00:55,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  29%|##9       | 32/110 [00:22<00:57,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  30%|###       | 33/110 [00:22<00:55,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  31%|###       | 34/110 [00:23<00:53,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  32%|###1      | 35/110 [00:24<00:52,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  33%|###2      | 36/110 [00:24<00:51,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  34%|###3      | 37/110 [00:25<00:49,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  35%|###4      | 38/110 [00:26<00:49,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  35%|###5      | 39/110 [00:26<00:48,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  36%|###6      | 40/110 [00:27<00:50,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  37%|###7      | 41/110 [00:28<00:49,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  38%|###8      | 42/110 [00:29<00:52,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  39%|###9      | 43/110 [00:30<00:50,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  40%|####      | 44/110 [00:30<00:49,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  41%|####      | 45/110 [00:31<00:46,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  42%|####1     | 46/110 [00:32<00:44,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  43%|####2     | 47/110 [00:32<00:44,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  44%|####3     | 48/110 [00:33<00:43,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  45%|####4     | 49/110 [00:34<00:42,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  45%|####5     | 50/110 [00:34<00:41,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  46%|####6     | 51/110 [00:35<00:41,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  47%|####7     | 52/110 [00:36<00:40,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  48%|####8     | 53/110 [00:37<00:40,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  49%|####9     | 54/110 [00:37<00:40,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  50%|#####     | 55/110 [00:38<00:40,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  51%|#####     | 56/110 [00:39<00:38,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  52%|#####1    | 57/110 [00:39<00:37,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  53%|#####2    | 58/110 [00:40<00:34,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  54%|#####3    | 59/110 [00:41<00:33,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  55%|#####4    | 60/110 [00:41<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  55%|#####5    | 61/110 [00:42<00:35,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  56%|#####6    | 62/110 [00:43<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  57%|#####7    | 63/110 [00:44<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  58%|#####8    | 64/110 [00:44<00:33,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  59%|#####9    | 65/110 [00:45<00:31,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  60%|######    | 66/110 [00:46<00:31,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  61%|######    | 67/110 [00:47<00:32,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  62%|######1   | 68/110 [00:47<00:31,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  63%|######2   | 69/110 [00:48<00:29,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  64%|######3   | 70/110 [00:50<00:38,  1.03it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  65%|######4   | 71/110 [00:50<00:35,  1.11it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  65%|######5   | 72/110 [00:51<00:32,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  66%|######6   | 73/110 [00:52<00:30,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  67%|######7   | 74/110 [00:52<00:27,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  68%|######8   | 75/110 [00:53<00:25,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  69%|######9   | 76/110 [00:54<00:25,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  70%|#######   | 77/110 [00:55<00:25,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  71%|#######   | 78/110 [00:55<00:24,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  72%|#######1  | 79/110 [00:56<00:22,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  73%|#######2  | 80/110 [00:57<00:21,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  74%|#######3  | 81/110 [00:58<00:20,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  75%|#######4  | 82/110 [00:58<00:19,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  75%|#######5  | 83/110 [00:59<00:18,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  76%|#######6  | 84/110 [01:00<00:17,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  77%|#######7  | 85/110 [01:00<00:17,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  78%|#######8  | 86/110 [01:01<00:16,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  79%|#######9  | 87/110 [01:02<00:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  80%|########  | 88/110 [01:02<00:16,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  81%|########  | 89/110 [01:03<00:15,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  82%|########1 | 90/110 [01:04<00:14,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  83%|########2 | 91/110 [01:05<00:13,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  84%|########3 | 92/110 [01:05<00:12,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  85%|########4 | 93/110 [01:06<00:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  85%|########5 | 94/110 [01:07<00:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  86%|########6 | 95/110 [01:07<00:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  87%|########7 | 96/110 [01:08<00:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  88%|########8 | 97/110 [01:09<00:09,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  89%|########9 | 98/110 [01:10<00:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  90%|######### | 99/110 [01:10<00:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  91%|######### | 100/110 [01:11<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  92%|#########1| 101/110 [01:12<00:06,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  93%|#########2| 102/110 [01:12<00:05,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  94%|#########3| 103/110 [01:13<00:05,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  95%|#########4| 104/110 [01:14<00:04,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  95%|#########5| 105/110 [01:15<00:03,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  96%|#########6| 106/110 [01:16<00:03,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  97%|#########7| 107/110 [01:16<00:02,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  98%|#########8| 108/110 [01:17<00:01,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  99%|#########9| 109/110 [01:18<00:00,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||: 100%|##########| 110/110 [01:18<00:00,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||: 100%|##########| 110/110 [01:18<00:00,  1.40it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7204, acc: 0.6250, no_result: 0.0938, loss: 1.2708 ||:   4%|4         | 1/24 [00:00<00:09,  2.33it/s]
+BLEU: 0.7225, acc: 0.6562, no_result: 0.0938, loss: 1.1960 ||:   8%|8         | 2/24 [00:00<00:09,  2.42it/s]
+BLEU: 0.7009, acc: 0.6354, no_result: 0.1146, loss: 1.3318 ||:  12%|#2        | 3/24 [00:01<00:08,  2.41it/s]
+BLEU: 0.7219, acc: 0.6484, no_result: 0.1172, loss: 1.2332 ||:  17%|#6        | 4/24 [00:01<00:08,  2.36it/s]
+BLEU: 0.7473, acc: 0.6750, no_result: 0.1187, loss: 1.1814 ||:  21%|##        | 5/24 [00:02<00:08,  2.33it/s]
+BLEU: 0.7496, acc: 0.6562, no_result: 0.1146, loss: 1.1826 ||:  25%|##5       | 6/24 [00:02<00:07,  2.41it/s]
+BLEU: 0.7575, acc: 0.6696, no_result: 0.1071, loss: 1.1613 ||:  29%|##9       | 7/24 [00:02<00:06,  2.53it/s]
+BLEU: 0.7545, acc: 0.6797, no_result: 0.1055, loss: 1.1893 ||:  33%|###3      | 8/24 [00:03<00:06,  2.53it/s]
+BLEU: 0.7426, acc: 0.6910, no_result: 0.1007, loss: 1.1833 ||:  38%|###7      | 9/24 [00:03<00:06,  2.48it/s]
+BLEU: 0.7503, acc: 0.6813, no_result: 0.0969, loss: 1.2036 ||:  42%|####1     | 10/24 [00:04<00:05,  2.43it/s]
+BLEU: 0.7476, acc: 0.6733, no_result: 0.0994, loss: 1.2541 ||:  46%|####5     | 11/24 [00:04<00:05,  2.33it/s]
+BLEU: 0.7425, acc: 0.6797, no_result: 0.0964, loss: 1.2677 ||:  50%|#####     | 12/24 [00:04<00:05,  2.38it/s]
+BLEU: 0.7408, acc: 0.6779, no_result: 0.1034, loss: 1.2922 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.31it/s]
+BLEU: 0.7395, acc: 0.6652, no_result: 0.1004, loss: 1.3407 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.23it/s]
+BLEU: 0.7317, acc: 0.6458, no_result: 0.1229, loss: 1.3995 ||:  62%|######2   | 15/24 [00:06<00:04,  2.06it/s]
+BLEU: 0.7271, acc: 0.6406, no_result: 0.1250, loss: 1.4240 ||:  67%|######6   | 16/24 [00:07<00:03,  2.02it/s]
+BLEU: 0.7253, acc: 0.6360, no_result: 0.1342, loss: 1.4110 ||:  71%|#######   | 17/24 [00:07<00:03,  2.08it/s]
+BLEU: 0.7216, acc: 0.6302, no_result: 0.1389, loss: 1.4271 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.07it/s]
+BLEU: 0.7245, acc: 0.6382, no_result: 0.1349, loss: 1.4048 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.16it/s]
+BLEU: 0.7266, acc: 0.6422, no_result: 0.1297, loss: 1.3927 ||:  83%|########3 | 20/24 [00:08<00:01,  2.29it/s]
+BLEU: 0.7242, acc: 0.6414, no_result: 0.1250, loss: 1.3969 ||:  88%|########7 | 21/24 [00:09<00:01,  2.29it/s]
+BLEU: 0.7294, acc: 0.6378, no_result: 0.1236, loss: 1.3897 ||:  92%|#########1| 22/24 [00:09<00:00,  2.27it/s]
+BLEU: 0.7327, acc: 0.6454, no_result: 0.1182, loss: 1.3657 ||:  96%|#########5| 23/24 [00:10<00:00,  2.35it/s]
+BLEU: 0.7325, acc: 0.6509, no_result: 0.1133, loss: 1.3885 ||: 100%|##########| 24/24 [00:10<00:00,  2.89it/s]
+BLEU: 0.7325, acc: 0.6509, no_result: 0.1133, loss: 1.3885 ||: 100%|##########| 24/24 [00:10<00:00,  2.36it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:   1%|          | 1/110 [00:00<01:19,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0125 ||:   2%|1         | 2/110 [00:01<01:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:   3%|2         | 3/110 [00:02<01:11,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:   4%|3         | 4/110 [00:02<01:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:   5%|4         | 5/110 [00:03<01:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0237 ||:   5%|5         | 6/110 [00:04<01:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0217 ||:   6%|6         | 7/110 [00:04<01:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:   7%|7         | 8/110 [00:05<01:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:   8%|8         | 9/110 [00:06<01:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0197 ||:   9%|9         | 10/110 [00:06<01:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  10%|#         | 11/110 [00:07<01:05,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0204 ||:  11%|#         | 12/110 [00:08<01:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:  12%|#1        | 13/110 [00:08<01:02,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  13%|#2        | 14/110 [00:09<01:01,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0200 ||:  14%|#3        | 15/110 [00:09<01:00,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0196 ||:  15%|#4        | 16/110 [00:10<01:02,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  15%|#5        | 17/110 [00:11<01:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  16%|#6        | 18/110 [00:11<00:58,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  17%|#7        | 19/110 [00:12<00:57,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  18%|#8        | 20/110 [00:13<00:59,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  19%|#9        | 21/110 [00:13<00:57,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  20%|##        | 22/110 [00:14<00:56,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  21%|##        | 23/110 [00:15<00:55,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0191 ||:  22%|##1       | 24/110 [00:15<00:54,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0189 ||:  23%|##2       | 25/110 [00:16<00:54,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  24%|##3       | 26/110 [00:17<00:55,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  25%|##4       | 27/110 [00:17<00:55,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  25%|##5       | 28/110 [00:18<00:55,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  26%|##6       | 29/110 [00:19<01:00,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  27%|##7       | 30/110 [00:19<00:56,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  28%|##8       | 31/110 [00:20<00:56,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  29%|##9       | 32/110 [00:21<00:53,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  30%|###       | 33/110 [00:21<00:51,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  31%|###       | 34/110 [00:22<00:50,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  32%|###1      | 35/110 [00:23<00:48,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  33%|###2      | 36/110 [00:23<00:47,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  34%|###3      | 37/110 [00:24<00:45,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  35%|###4      | 38/110 [00:24<00:43,  1.65it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  35%|###5      | 39/110 [00:25<00:43,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  36%|###6      | 40/110 [00:26<00:43,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  37%|###7      | 41/110 [00:26<00:44,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  38%|###8      | 42/110 [00:27<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  39%|###9      | 43/110 [00:28<00:47,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  40%|####      | 44/110 [00:29<00:47,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  41%|####      | 45/110 [00:29<00:44,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  42%|####1     | 46/110 [00:30<00:46,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  43%|####2     | 47/110 [00:31<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  44%|####3     | 48/110 [00:31<00:41,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  45%|####4     | 49/110 [00:32<00:40,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  45%|####5     | 50/110 [00:33<00:40,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  46%|####6     | 51/110 [00:33<00:38,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  47%|####7     | 52/110 [00:34<00:40,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  48%|####8     | 53/110 [00:35<00:38,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  49%|####9     | 54/110 [00:35<00:37,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  50%|#####     | 55/110 [00:36<00:36,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  51%|#####     | 56/110 [00:37<00:34,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  52%|#####1    | 57/110 [00:37<00:34,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  53%|#####2    | 58/110 [00:38<00:35,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  54%|#####3    | 59/110 [00:39<00:34,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  55%|#####4    | 60/110 [00:40<00:47,  1.04it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  55%|#####5    | 61/110 [00:41<00:42,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  56%|#####6    | 62/110 [00:42<00:37,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  57%|#####7    | 63/110 [00:42<00:33,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  58%|#####8    | 64/110 [00:43<00:32,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  59%|#####9    | 65/110 [00:43<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  60%|######    | 66/110 [00:44<00:31,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  61%|######    | 67/110 [00:45<00:29,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  62%|######1   | 68/110 [00:46<00:28,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  63%|######2   | 69/110 [00:46<00:26,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  64%|######3   | 70/110 [00:47<00:26,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  65%|######4   | 71/110 [00:48<00:28,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  65%|######5   | 72/110 [00:48<00:27,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  66%|######6   | 73/110 [00:49<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  67%|######7   | 74/110 [00:50<00:25,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  68%|######8   | 75/110 [00:51<00:25,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  69%|######9   | 76/110 [00:51<00:23,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  70%|#######   | 77/110 [00:52<00:21,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  71%|#######   | 78/110 [00:52<00:21,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  74%|#######3  | 81/110 [00:54<00:18,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  76%|#######6  | 84/110 [00:56<00:16,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  77%|#######7  | 85/110 [00:57<00:16,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  78%|#######8  | 86/110 [00:58<00:16,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  79%|#######9  | 87/110 [00:58<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  80%|########  | 88/110 [00:59<00:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  81%|########  | 89/110 [01:00<00:14,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  82%|########1 | 90/110 [01:00<00:13,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  83%|########2 | 91/110 [01:01<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  84%|########3 | 92/110 [01:02<00:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  85%|########4 | 93/110 [01:02<00:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  85%|########5 | 94/110 [01:03<00:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  86%|########6 | 95/110 [01:04<00:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  87%|########7 | 96/110 [01:05<00:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  88%|########8 | 97/110 [01:05<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  89%|########9 | 98/110 [01:06<00:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  90%|######### | 99/110 [01:07<00:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  91%|######### | 100/110 [01:07<00:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  92%|#########1| 101/110 [01:08<00:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  93%|#########2| 102/110 [01:09<00:05,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  94%|#########3| 103/110 [01:10<00:05,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  95%|#########4| 104/110 [01:10<00:04,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  95%|#########5| 105/110 [01:11<00:03,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  96%|#########6| 106/110 [01:12<00:02,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  97%|#########7| 107/110 [01:12<00:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  98%|#########8| 108/110 [01:13<00:01,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  99%|#########9| 109/110 [01:14<00:00,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||: 100%|##########| 110/110 [01:14<00:00,  1.65it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7204, acc: 0.6250, no_result: 0.0938, loss: 1.2736 ||:   4%|4         | 1/24 [00:00<00:08,  2.63it/s]
+BLEU: 0.7182, acc: 0.6562, no_result: 0.0938, loss: 1.2015 ||:   8%|8         | 2/24 [00:00<00:07,  2.77it/s]
+BLEU: 0.6959, acc: 0.6250, no_result: 0.1250, loss: 1.3317 ||:  12%|#2        | 3/24 [00:01<00:07,  2.77it/s]
+BLEU: 0.7184, acc: 0.6328, no_result: 0.1250, loss: 1.2378 ||:  17%|#6        | 4/24 [00:01<00:07,  2.69it/s]
+BLEU: 0.7422, acc: 0.6625, no_result: 0.1250, loss: 1.1852 ||:  21%|##        | 5/24 [00:01<00:07,  2.65it/s]
+BLEU: 0.7379, acc: 0.6510, no_result: 0.1198, loss: 1.1869 ||:  25%|##5       | 6/24 [00:02<00:06,  2.74it/s]
+BLEU: 0.7474, acc: 0.6652, no_result: 0.1116, loss: 1.1633 ||:  29%|##9       | 7/24 [00:02<00:05,  2.87it/s]
+BLEU: 0.7448, acc: 0.6797, no_result: 0.1055, loss: 1.1905 ||:  33%|###3      | 8/24 [00:02<00:05,  2.89it/s]
+BLEU: 0.7347, acc: 0.6875, no_result: 0.1076, loss: 1.1845 ||:  38%|###7      | 9/24 [00:03<00:05,  2.82it/s]
+BLEU: 0.7440, acc: 0.6719, no_result: 0.1000, loss: 1.2037 ||:  42%|####1     | 10/24 [00:03<00:05,  2.66it/s]
+BLEU: 0.7425, acc: 0.6619, no_result: 0.0994, loss: 1.2553 ||:  46%|####5     | 11/24 [00:04<00:05,  2.46it/s]
+BLEU: 0.7394, acc: 0.6693, no_result: 0.0938, loss: 1.2697 ||:  50%|#####     | 12/24 [00:04<00:04,  2.49it/s]
+BLEU: 0.7361, acc: 0.6683, no_result: 0.1010, loss: 1.2931 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.37it/s]
+BLEU: 0.7329, acc: 0.6562, no_result: 0.0960, loss: 1.3449 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.26it/s]
+BLEU: 0.7228, acc: 0.6354, no_result: 0.1167, loss: 1.4040 ||:  62%|######2   | 15/24 [00:06<00:04,  2.07it/s]
+BLEU: 0.7211, acc: 0.6289, no_result: 0.1230, loss: 1.4268 ||:  67%|######6   | 16/24 [00:06<00:03,  2.03it/s]
+BLEU: 0.7189, acc: 0.6250, no_result: 0.1287, loss: 1.4116 ||:  71%|#######   | 17/24 [00:07<00:03,  2.09it/s]
+BLEU: 0.7153, acc: 0.6215, no_result: 0.1354, loss: 1.4238 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.07it/s]
+BLEU: 0.7177, acc: 0.6299, no_result: 0.1316, loss: 1.4029 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.16it/s]
+BLEU: 0.7214, acc: 0.6359, no_result: 0.1281, loss: 1.3906 ||:  83%|########3 | 20/24 [00:08<00:01,  2.28it/s]
+BLEU: 0.7184, acc: 0.6354, no_result: 0.1235, loss: 1.3961 ||:  88%|########7 | 21/24 [00:08<00:01,  2.28it/s]
+BLEU: 0.7225, acc: 0.6307, no_result: 0.1222, loss: 1.3899 ||:  92%|#########1| 22/24 [00:09<00:00,  2.25it/s]
+BLEU: 0.7258, acc: 0.6386, no_result: 0.1168, loss: 1.3659 ||:  96%|#########5| 23/24 [00:09<00:00,  2.31it/s]
+BLEU: 0.7257, acc: 0.6398, no_result: 0.1120, loss: 1.3886 ||: 100%|##########| 24/24 [00:09<00:00,  2.85it/s]
+BLEU: 0.7257, acc: 0.6398, no_result: 0.1120, loss: 1.3886 ||: 100%|##########| 24/24 [00:09<00:00,  2.46it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0215 ||:   1%|          | 1/110 [00:00<01:24,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:   2%|1         | 2/110 [00:01<01:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:   3%|2         | 3/110 [00:02<01:17,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:   4%|3         | 4/110 [00:02<01:16,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:   5%|4         | 5/110 [00:03<01:14,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:   5%|5         | 6/110 [00:04<01:12,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:   6%|6         | 7/110 [00:04<01:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:   7%|7         | 8/110 [00:05<01:08,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:   8%|8         | 9/110 [00:06<01:06,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:   9%|9         | 10/110 [00:06<01:04,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  10%|#         | 11/110 [00:07<01:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  11%|#         | 12/110 [00:08<01:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  12%|#1        | 13/110 [00:08<01:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  13%|#2        | 14/110 [00:09<01:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  14%|#3        | 15/110 [00:10<01:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  15%|#4        | 16/110 [00:11<01:08,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  15%|#5        | 17/110 [00:11<01:05,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:  16%|#6        | 18/110 [00:12<01:06,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:  17%|#7        | 19/110 [00:13<01:03,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  18%|#8        | 20/110 [00:13<01:01,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  19%|#9        | 21/110 [00:14<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  20%|##        | 22/110 [00:15<01:02,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  21%|##        | 23/110 [00:15<01:00,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  22%|##1       | 24/110 [00:16<01:02,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  23%|##2       | 25/110 [00:17<01:00,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0190 ||:  24%|##3       | 26/110 [00:18<01:00,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  25%|##4       | 27/110 [00:18<00:59,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  25%|##5       | 28/110 [00:19<00:57,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  26%|##6       | 29/110 [00:20<00:57,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  27%|##7       | 30/110 [00:20<00:55,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  28%|##8       | 31/110 [00:21<00:55,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  29%|##9       | 32/110 [00:22<00:55,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  30%|###       | 33/110 [00:23<00:54,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  31%|###       | 34/110 [00:23<00:54,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  32%|###1      | 35/110 [00:24<00:56,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  33%|###2      | 36/110 [00:25<00:55,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  34%|###3      | 37/110 [00:26<00:54,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  35%|###4      | 38/110 [00:26<00:53,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  35%|###5      | 39/110 [00:27<00:53,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  36%|###6      | 40/110 [00:28<00:51,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  37%|###7      | 41/110 [00:29<00:50,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  38%|###8      | 42/110 [00:29<00:49,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  39%|###9      | 43/110 [00:30<00:49,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  40%|####      | 44/110 [00:31<00:46,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  41%|####      | 45/110 [00:31<00:44,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  42%|####1     | 46/110 [00:32<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  43%|####2     | 47/110 [00:33<00:45,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  44%|####3     | 48/110 [00:34<00:45,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  45%|####4     | 49/110 [00:34<00:46,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  45%|####5     | 50/110 [00:36<00:57,  1.04it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  46%|####6     | 51/110 [00:36<00:51,  1.14it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  47%|####7     | 52/110 [00:37<00:46,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  48%|####8     | 53/110 [00:38<00:42,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  49%|####9     | 54/110 [00:38<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  50%|#####     | 55/110 [00:39<00:38,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  51%|#####     | 56/110 [00:40<00:40,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  52%|#####1    | 57/110 [00:41<00:38,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  53%|#####2    | 58/110 [00:41<00:35,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  54%|#####3    | 59/110 [00:42<00:34,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  55%|#####4    | 60/110 [00:42<00:33,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  55%|#####5    | 61/110 [00:43<00:31,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  56%|#####6    | 62/110 [00:44<00:31,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  57%|#####7    | 63/110 [00:44<00:31,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0178 ||:  58%|#####8    | 64/110 [00:45<00:30,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  59%|#####9    | 65/110 [00:46<00:29,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  60%|######    | 66/110 [00:46<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  61%|######    | 67/110 [00:47<00:30,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  62%|######1   | 68/110 [00:48<00:32,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  63%|######2   | 69/110 [00:49<00:29,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  64%|######3   | 70/110 [00:49<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  65%|######4   | 71/110 [00:50<00:26,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  65%|######5   | 72/110 [00:51<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  66%|######6   | 73/110 [00:51<00:24,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  67%|######7   | 74/110 [00:52<00:24,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  68%|######8   | 75/110 [00:53<00:23,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  69%|######9   | 76/110 [00:53<00:22,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  70%|#######   | 77/110 [00:54<00:22,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  71%|#######   | 78/110 [00:55<00:23,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  72%|#######1  | 79/110 [00:56<00:21,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  73%|#######2  | 80/110 [00:56<00:20,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  74%|#######3  | 81/110 [00:57<00:19,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  75%|#######4  | 82/110 [00:57<00:18,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  75%|#######5  | 83/110 [00:58<00:19,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  76%|#######6  | 84/110 [00:59<00:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  77%|#######7  | 85/110 [01:00<00:17,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  78%|#######8  | 86/110 [01:00<00:16,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  79%|#######9  | 87/110 [01:01<00:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  80%|########  | 88/110 [01:02<00:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  81%|########  | 89/110 [01:02<00:14,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  82%|########1 | 90/110 [01:03<00:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  83%|########2 | 91/110 [01:04<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  84%|########3 | 92/110 [01:04<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  85%|########4 | 93/110 [01:05<00:11,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  85%|########5 | 94/110 [01:06<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  86%|########6 | 95/110 [01:06<00:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  87%|########7 | 96/110 [01:07<00:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  88%|########8 | 97/110 [01:08<00:08,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  89%|########9 | 98/110 [01:09<00:08,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  90%|######### | 99/110 [01:09<00:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  91%|######### | 100/110 [01:10<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:  92%|#########1| 101/110 [01:11<00:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  93%|#########2| 102/110 [01:11<00:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  94%|#########3| 103/110 [01:12<00:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  95%|#########4| 104/110 [01:13<00:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  95%|#########5| 105/110 [01:13<00:03,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  96%|#########6| 106/110 [01:14<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  97%|#########7| 107/110 [01:15<00:02,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  98%|#########8| 108/110 [01:16<00:01,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  99%|#########9| 109/110 [01:16<00:00,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||: 100%|##########| 110/110 [01:17<00:00,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||: 100%|##########| 110/110 [01:17<00:00,  1.42it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.0938, loss: 1.2615 ||:   4%|4         | 1/24 [00:00<00:09,  2.31it/s]
+BLEU: 0.7038, acc: 0.6250, no_result: 0.1094, loss: 1.1991 ||:   8%|8         | 2/24 [00:00<00:09,  2.39it/s]
+BLEU: 0.6817, acc: 0.6250, no_result: 0.1250, loss: 1.3335 ||:  12%|#2        | 3/24 [00:01<00:08,  2.38it/s]
+BLEU: 0.7081, acc: 0.6328, no_result: 0.1250, loss: 1.2400 ||:  17%|#6        | 4/24 [00:01<00:08,  2.34it/s]
+BLEU: 0.7343, acc: 0.6625, no_result: 0.1250, loss: 1.1846 ||:  21%|##        | 5/24 [00:02<00:08,  2.31it/s]
+BLEU: 0.7367, acc: 0.6562, no_result: 0.1146, loss: 1.1821 ||:  25%|##5       | 6/24 [00:02<00:07,  2.37it/s]
+BLEU: 0.7464, acc: 0.6696, no_result: 0.1071, loss: 1.1567 ||:  29%|##9       | 7/24 [00:02<00:06,  2.49it/s]
+BLEU: 0.7438, acc: 0.6797, no_result: 0.1055, loss: 1.1862 ||:  33%|###3      | 8/24 [00:03<00:06,  2.51it/s]
+BLEU: 0.7339, acc: 0.6875, no_result: 0.1076, loss: 1.1808 ||:  38%|###7      | 9/24 [00:03<00:06,  2.46it/s]
+BLEU: 0.7439, acc: 0.6750, no_result: 0.1000, loss: 1.2012 ||:  42%|####1     | 10/24 [00:04<00:05,  2.41it/s]
+BLEU: 0.7419, acc: 0.6619, no_result: 0.1023, loss: 1.2527 ||:  46%|####5     | 11/24 [00:04<00:05,  2.31it/s]
+BLEU: 0.7378, acc: 0.6719, no_result: 0.0964, loss: 1.2677 ||:  50%|#####     | 12/24 [00:05<00:05,  2.36it/s]
+BLEU: 0.7379, acc: 0.6707, no_result: 0.1034, loss: 1.2926 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.29it/s]
+BLEU: 0.7355, acc: 0.6562, no_result: 0.0982, loss: 1.3443 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.22it/s]
+BLEU: 0.7264, acc: 0.6375, no_result: 0.1187, loss: 1.4049 ||:  62%|######2   | 15/24 [00:06<00:04,  2.05it/s]
+BLEU: 0.7218, acc: 0.6309, no_result: 0.1211, loss: 1.4286 ||:  67%|######6   | 16/24 [00:07<00:03,  2.02it/s]
+BLEU: 0.7206, acc: 0.6250, no_result: 0.1268, loss: 1.4146 ||:  71%|#######   | 17/24 [00:07<00:03,  2.08it/s]
+BLEU: 0.7165, acc: 0.6215, no_result: 0.1319, loss: 1.4284 ||:  75%|#######5  | 18/24 [00:08<00:02,  2.07it/s]
+BLEU: 0.7179, acc: 0.6283, no_result: 0.1283, loss: 1.4071 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.16it/s]
+BLEU: 0.7204, acc: 0.6344, no_result: 0.1250, loss: 1.3939 ||:  83%|########3 | 20/24 [00:08<00:01,  2.28it/s]
+BLEU: 0.7171, acc: 0.6324, no_result: 0.1205, loss: 1.3998 ||:  88%|########7 | 21/24 [00:09<00:01,  2.29it/s]
+BLEU: 0.7227, acc: 0.6278, no_result: 0.1193, loss: 1.3940 ||:  92%|#########1| 22/24 [00:09<00:00,  2.26it/s]
+BLEU: 0.7262, acc: 0.6345, no_result: 0.1155, loss: 1.3698 ||:  96%|#########5| 23/24 [00:10<00:00,  2.34it/s]
+BLEU: 0.7261, acc: 0.6405, no_result: 0.1107, loss: 1.3931 ||: 100%|##########| 24/24 [00:10<00:00,  2.88it/s]
+BLEU: 0.7261, acc: 0.6405, no_result: 0.1107, loss: 1.3931 ||: 100%|##########| 24/24 [00:10<00:00,  2.34it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:   1%|          | 1/110 [00:01<01:55,  1.06s/it]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:   2%|1         | 2/110 [00:01<01:29,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0226 ||:   3%|2         | 3/110 [00:02<01:26,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0222 ||:   4%|3         | 4/110 [00:03<01:19,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0210 ||:   5%|4         | 5/110 [00:03<01:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:   5%|5         | 6/110 [00:04<01:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0201 ||:   6%|6         | 7/110 [00:05<01:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:   7%|7         | 8/110 [00:05<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:   8%|8         | 9/110 [00:06<01:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0184 ||:   9%|9         | 10/110 [00:07<01:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0209 ||:  10%|#         | 11/110 [00:07<01:10,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  11%|#         | 12/110 [00:08<01:07,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  12%|#1        | 13/110 [00:09<01:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0203 ||:  13%|#2        | 14/110 [00:10<01:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0207 ||:  14%|#3        | 15/110 [00:10<01:06,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  15%|#4        | 16/110 [00:11<01:08,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0202 ||:  15%|#5        | 17/110 [00:12<01:09,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  16%|#6        | 18/110 [00:13<01:08,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0195 ||:  17%|#7        | 19/110 [00:13<01:06,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0188 ||:  18%|#8        | 20/110 [00:14<01:09,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  19%|#9        | 21/110 [00:15<01:07,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:  20%|##        | 22/110 [00:16<01:05,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  21%|##        | 23/110 [00:16<01:05,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0187 ||:  22%|##1       | 24/110 [00:17<01:03,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:  23%|##2       | 25/110 [00:18<01:01,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0185 ||:  24%|##3       | 26/110 [00:18<00:59,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  25%|##4       | 27/110 [00:19<01:01,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0183 ||:  25%|##5       | 28/110 [00:20<01:00,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  26%|##6       | 29/110 [00:21<00:59,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  27%|##7       | 30/110 [00:22<01:03,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  28%|##8       | 31/110 [00:22<01:01,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  29%|##9       | 32/110 [00:23<00:58,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  30%|###       | 33/110 [00:24<00:58,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  31%|###       | 34/110 [00:25<00:56,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  32%|###1      | 35/110 [00:25<00:54,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  33%|###2      | 36/110 [00:26<00:53,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  34%|###3      | 37/110 [00:27<00:53,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  35%|###4      | 38/110 [00:27<00:53,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  35%|###5      | 39/110 [00:28<00:51,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  36%|###6      | 40/110 [00:30<01:07,  1.04it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  37%|###7      | 41/110 [00:30<01:00,  1.14it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  38%|###8      | 42/110 [00:31<00:55,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  39%|###9      | 43/110 [00:32<00:55,  1.21it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  40%|####      | 44/110 [00:32<00:50,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  41%|####      | 45/110 [00:33<00:47,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  42%|####1     | 46/110 [00:34<00:46,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  43%|####2     | 47/110 [00:35<00:48,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  44%|####3     | 48/110 [00:35<00:47,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0172 ||:  45%|####4     | 49/110 [00:36<00:45,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  45%|####5     | 50/110 [00:37<00:43,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  46%|####6     | 51/110 [00:38<00:42,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  47%|####7     | 52/110 [00:38<00:42,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  48%|####8     | 53/110 [00:39<00:41,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  49%|####9     | 54/110 [00:40<00:42,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  50%|#####     | 55/110 [00:41<00:41,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  51%|#####     | 56/110 [00:41<00:41,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  52%|#####1    | 57/110 [00:42<00:39,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  53%|#####2    | 58/110 [00:43<00:37,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  54%|#####3    | 59/110 [00:43<00:36,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  55%|#####4    | 60/110 [00:44<00:36,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  55%|#####5    | 61/110 [00:45<00:34,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  56%|#####6    | 62/110 [00:46<00:34,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  57%|#####7    | 63/110 [00:46<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  58%|#####8    | 64/110 [00:47<00:33,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  59%|#####9    | 65/110 [00:48<00:34,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  60%|######    | 66/110 [00:49<00:33,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  61%|######    | 67/110 [00:49<00:32,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  62%|######1   | 68/110 [00:50<00:31,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  63%|######2   | 69/110 [00:51<00:30,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  64%|######3   | 70/110 [00:52<00:29,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  65%|######4   | 71/110 [00:53<00:30,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  65%|######5   | 72/110 [00:53<00:29,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  66%|######6   | 73/110 [00:54<00:27,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  67%|######7   | 74/110 [00:55<00:26,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  68%|######8   | 75/110 [00:55<00:26,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  69%|######9   | 76/110 [00:56<00:25,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  70%|#######   | 77/110 [00:57<00:23,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  71%|#######   | 78/110 [00:58<00:23,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  72%|#######1  | 79/110 [00:58<00:23,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  73%|#######2  | 80/110 [00:59<00:23,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  74%|#######3  | 81/110 [01:00<00:21,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  75%|#######4  | 82/110 [01:01<00:21,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  75%|#######5  | 83/110 [01:01<00:19,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  76%|#######6  | 84/110 [01:02<00:18,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  77%|#######7  | 85/110 [01:03<00:17,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  78%|#######8  | 86/110 [01:03<00:16,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  79%|#######9  | 87/110 [01:04<00:15,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  80%|########  | 88/110 [01:05<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  81%|########  | 89/110 [01:05<00:14,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  82%|########1 | 90/110 [01:06<00:14,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  83%|########2 | 91/110 [01:07<00:13,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  84%|########3 | 92/110 [01:08<00:12,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  85%|########4 | 93/110 [01:08<00:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  85%|########5 | 94/110 [01:09<00:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  86%|########6 | 95/110 [01:10<00:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  87%|########7 | 96/110 [01:10<00:10,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  88%|########8 | 97/110 [01:11<00:09,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  89%|########9 | 98/110 [01:12<00:09,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  90%|######### | 99/110 [01:13<00:08,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  91%|######### | 100/110 [01:14<00:07,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  92%|#########1| 101/110 [01:14<00:06,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  93%|#########2| 102/110 [01:15<00:05,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  94%|#########3| 103/110 [01:16<00:05,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  95%|#########4| 104/110 [01:16<00:04,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  95%|#########5| 105/110 [01:17<00:03,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  96%|#########6| 106/110 [01:18<00:03,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  97%|#########7| 107/110 [01:19<00:02,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  98%|#########8| 108/110 [01:20<00:01,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  99%|#########9| 109/110 [01:20<00:00,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||: 100%|##########| 110/110 [01:21<00:00,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||: 100%|##########| 110/110 [01:21<00:00,  1.35it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7204, acc: 0.5625, no_result: 0.1562, loss: 1.3053 ||:   4%|4         | 1/24 [00:00<00:10,  2.29it/s]
+BLEU: 0.7146, acc: 0.6094, no_result: 0.1406, loss: 1.2143 ||:   8%|8         | 2/24 [00:00<00:09,  2.42it/s]
+BLEU: 0.6935, acc: 0.6042, no_result: 0.1458, loss: 1.3360 ||:  12%|#2        | 3/24 [00:01<00:08,  2.39it/s]
+BLEU: 0.7166, acc: 0.6250, no_result: 0.1406, loss: 1.2388 ||:  17%|#6        | 4/24 [00:01<00:08,  2.35it/s]
+BLEU: 0.7409, acc: 0.6562, no_result: 0.1375, loss: 1.1878 ||:  21%|##        | 5/24 [00:02<00:08,  2.33it/s]
+BLEU: 0.7378, acc: 0.6458, no_result: 0.1302, loss: 1.1832 ||:  25%|##5       | 6/24 [00:02<00:07,  2.41it/s]
+BLEU: 0.7473, acc: 0.6607, no_result: 0.1205, loss: 1.1586 ||:  29%|##9       | 7/24 [00:02<00:06,  2.53it/s]
+BLEU: 0.7439, acc: 0.6758, no_result: 0.1133, loss: 1.1886 ||:  33%|###3      | 8/24 [00:03<00:06,  2.55it/s]
+BLEU: 0.7340, acc: 0.6840, no_result: 0.1146, loss: 1.1825 ||:  38%|###7      | 9/24 [00:03<00:06,  2.47it/s]
+BLEU: 0.7428, acc: 0.6719, no_result: 0.1031, loss: 1.2052 ||:  42%|####1     | 10/24 [00:04<00:05,  2.42it/s]
+BLEU: 0.7409, acc: 0.6648, no_result: 0.1023, loss: 1.2561 ||:  46%|####5     | 11/24 [00:04<00:05,  2.32it/s]
+BLEU: 0.7363, acc: 0.6693, no_result: 0.0990, loss: 1.2698 ||:  50%|#####     | 12/24 [00:04<00:05,  2.38it/s]
+BLEU: 0.7361, acc: 0.6683, no_result: 0.1058, loss: 1.2940 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.31it/s]
+BLEU: 0.7346, acc: 0.6540, no_result: 0.1049, loss: 1.3419 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.23it/s]
+BLEU: 0.7277, acc: 0.6354, no_result: 0.1271, loss: 1.4026 ||:  62%|######2   | 15/24 [00:06<00:04,  2.06it/s]
+BLEU: 0.7238, acc: 0.6309, no_result: 0.1289, loss: 1.4274 ||:  67%|######6   | 16/24 [00:07<00:03,  2.02it/s]
+BLEU: 0.7211, acc: 0.6250, no_result: 0.1342, loss: 1.4149 ||:  71%|#######   | 17/24 [00:07<00:03,  2.09it/s]
+BLEU: 0.7167, acc: 0.6233, no_result: 0.1372, loss: 1.4287 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.08it/s]
+BLEU: 0.7187, acc: 0.6299, no_result: 0.1332, loss: 1.4074 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.17it/s]
+BLEU: 0.7214, acc: 0.6344, no_result: 0.1297, loss: 1.3950 ||:  83%|########3 | 20/24 [00:08<00:01,  2.30it/s]
+BLEU: 0.7175, acc: 0.6339, no_result: 0.1250, loss: 1.4006 ||:  88%|########7 | 21/24 [00:09<00:01,  2.30it/s]
+BLEU: 0.7239, acc: 0.6321, no_result: 0.1236, loss: 1.3945 ||:  92%|#########1| 22/24 [00:09<00:00,  2.27it/s]
+BLEU: 0.7271, acc: 0.6386, no_result: 0.1196, loss: 1.3708 ||:  96%|#########5| 23/24 [00:10<00:00,  2.35it/s]
+BLEU: 0.7269, acc: 0.6444, no_result: 0.1146, loss: 1.3963 ||: 100%|##########| 24/24 [00:10<00:00,  2.88it/s]
+BLEU: 0.7269, acc: 0.6444, no_result: 0.1146, loss: 1.3963 ||: 100%|##########| 24/24 [00:10<00:00,  2.36it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0089 ||:   1%|          | 1/110 [00:00<01:22,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0104 ||:   2%|1         | 2/110 [00:01<01:10,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0103 ||:   3%|2         | 3/110 [00:01<01:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:   4%|3         | 4/110 [00:02<01:06,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0193 ||:   5%|4         | 5/110 [00:03<01:05,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:   5%|5         | 6/110 [00:03<01:05,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:   6%|6         | 7/110 [00:04<01:06,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:   7%|7         | 8/110 [00:05<01:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:   8%|8         | 9/110 [00:05<01:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:   9%|9         | 10/110 [00:06<01:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  10%|#         | 11/110 [00:07<01:09,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  11%|#         | 12/110 [00:08<01:07,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0181 ||:  12%|#1        | 13/110 [00:08<01:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  13%|#2        | 14/110 [00:09<01:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  14%|#3        | 15/110 [00:09<01:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  15%|#4        | 16/110 [00:10<01:02,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  15%|#5        | 17/110 [00:11<01:03,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  16%|#6        | 18/110 [00:12<01:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  17%|#7        | 19/110 [00:12<01:00,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  18%|#8        | 20/110 [00:13<00:58,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:  19%|#9        | 21/110 [00:13<00:58,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  20%|##        | 22/110 [00:14<00:57,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0177 ||:  21%|##        | 23/110 [00:15<00:56,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  22%|##1       | 24/110 [00:16<01:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  23%|##2       | 25/110 [00:16<01:00,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0175 ||:  24%|##3       | 26/110 [00:17<00:58,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:  25%|##4       | 27/110 [00:18<00:57,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  25%|##5       | 28/110 [00:18<00:57,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  26%|##6       | 29/110 [00:19<00:53,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  27%|##7       | 30/110 [00:20<01:11,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  28%|##8       | 31/110 [00:21<01:03,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  29%|##9       | 32/110 [00:22<00:59,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  30%|###       | 33/110 [00:22<00:55,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  31%|###       | 34/110 [00:23<00:55,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  32%|###1      | 35/110 [00:24<00:54,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  33%|###2      | 36/110 [00:24<00:51,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  34%|###3      | 37/110 [00:25<00:50,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  35%|###4      | 38/110 [00:26<00:53,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  35%|###5      | 39/110 [00:27<00:51,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  36%|###6      | 40/110 [00:27<00:49,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  37%|###7      | 41/110 [00:28<00:47,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  38%|###8      | 42/110 [00:29<00:48,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  39%|###9      | 43/110 [00:29<00:46,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  40%|####      | 44/110 [00:30<00:47,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  41%|####      | 45/110 [00:31<00:45,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  42%|####1     | 46/110 [00:31<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  43%|####2     | 47/110 [00:32<00:41,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  44%|####3     | 48/110 [00:33<00:40,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  45%|####4     | 49/110 [00:33<00:40,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  45%|####5     | 50/110 [00:34<00:39,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  46%|####6     | 51/110 [00:35<00:37,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  47%|####7     | 52/110 [00:35<00:36,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  48%|####8     | 53/110 [00:36<00:35,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  49%|####9     | 54/110 [00:36<00:35,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  50%|#####     | 55/110 [00:37<00:37,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  51%|#####     | 56/110 [00:38<00:36,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  52%|#####1    | 57/110 [00:39<00:39,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  53%|#####2    | 58/110 [00:39<00:35,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  54%|#####3    | 59/110 [00:40<00:34,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  55%|#####4    | 60/110 [00:41<00:36,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  55%|#####5    | 61/110 [00:42<00:36,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  56%|#####6    | 62/110 [00:42<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  57%|#####7    | 63/110 [00:43<00:33,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  58%|#####8    | 64/110 [00:44<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  59%|#####9    | 65/110 [00:44<00:29,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  60%|######    | 66/110 [00:45<00:29,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  61%|######    | 67/110 [00:46<00:28,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  62%|######1   | 68/110 [00:46<00:28,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  63%|######2   | 69/110 [00:47<00:27,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  64%|######3   | 70/110 [00:48<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  65%|######4   | 71/110 [00:48<00:26,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  65%|######5   | 72/110 [00:49<00:25,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  66%|######6   | 73/110 [00:50<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  67%|######7   | 74/110 [00:50<00:24,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  68%|######8   | 75/110 [00:51<00:25,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  69%|######9   | 76/110 [00:52<00:23,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  70%|#######   | 77/110 [00:52<00:21,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  71%|#######   | 78/110 [00:53<00:20,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  72%|#######1  | 79/110 [00:54<00:19,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  73%|#######2  | 80/110 [00:54<00:19,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  74%|#######3  | 81/110 [00:55<00:19,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  75%|#######4  | 82/110 [00:56<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  78%|#######8  | 86/110 [00:58<00:15,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  79%|#######9  | 87/110 [00:59<00:14,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  80%|########  | 88/110 [00:59<00:13,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  81%|########  | 89/110 [01:00<00:13,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  82%|########1 | 90/110 [01:01<00:12,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  83%|########2 | 91/110 [01:01<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  84%|########3 | 92/110 [01:02<00:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  85%|########4 | 93/110 [01:03<00:10,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  85%|########5 | 94/110 [01:03<00:10,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  86%|########6 | 95/110 [01:04<00:10,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  87%|########7 | 96/110 [01:05<00:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  88%|########8 | 97/110 [01:05<00:08,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  89%|########9 | 98/110 [01:06<00:07,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  90%|######### | 99/110 [01:06<00:06,  1.65it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  91%|######### | 100/110 [01:07<00:06,  1.64it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  92%|#########1| 101/110 [01:08<00:05,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  93%|#########2| 102/110 [01:09<00:05,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  94%|#########3| 103/110 [01:09<00:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  95%|#########4| 104/110 [01:10<00:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  95%|#########5| 105/110 [01:11<00:03,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  96%|#########6| 106/110 [01:11<00:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  97%|#########7| 107/110 [01:12<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  98%|#########8| 108/110 [01:13<00:01,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  99%|#########9| 109/110 [01:13<00:00,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||: 100%|##########| 110/110 [01:14<00:00,  1.86it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||: 100%|##########| 110/110 [01:14<00:00,  1.49it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2831 ||:   4%|4         | 1/24 [00:00<00:08,  2.72it/s]
+BLEU: 0.7271, acc: 0.6562, no_result: 0.1250, loss: 1.2079 ||:   8%|8         | 2/24 [00:00<00:07,  2.84it/s]
+BLEU: 0.7040, acc: 0.6354, no_result: 0.1354, loss: 1.3322 ||:  12%|#2        | 3/24 [00:01<00:07,  2.85it/s]
+BLEU: 0.7242, acc: 0.6484, no_result: 0.1328, loss: 1.2357 ||:  17%|#6        | 4/24 [00:01<00:07,  2.75it/s]
+BLEU: 0.7457, acc: 0.6813, no_result: 0.1250, loss: 1.1828 ||:  21%|##        | 5/24 [00:01<00:07,  2.71it/s]
+BLEU: 0.7450, acc: 0.6719, no_result: 0.1198, loss: 1.1808 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.7536, acc: 0.6830, no_result: 0.1116, loss: 1.1564 ||:  29%|##9       | 7/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7494, acc: 0.6875, no_result: 0.1094, loss: 1.1879 ||:  33%|###3      | 8/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.7388, acc: 0.6944, no_result: 0.1111, loss: 1.1836 ||:  38%|###7      | 9/24 [00:03<00:05,  2.89it/s]
+BLEU: 0.7470, acc: 0.6813, no_result: 0.1000, loss: 1.2053 ||:  42%|####1     | 10/24 [00:03<00:04,  2.81it/s]
+BLEU: 0.7447, acc: 0.6733, no_result: 0.1023, loss: 1.2561 ||:  46%|####5     | 11/24 [00:03<00:04,  2.70it/s]
+BLEU: 0.7393, acc: 0.6797, no_result: 0.0990, loss: 1.2698 ||:  50%|#####     | 12/24 [00:04<00:04,  2.79it/s]
+BLEU: 0.7374, acc: 0.6779, no_result: 0.1034, loss: 1.2958 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.7363, acc: 0.6652, no_result: 0.1004, loss: 1.3426 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.61it/s]
+BLEU: 0.7280, acc: 0.6438, no_result: 0.1208, loss: 1.4044 ||:  62%|######2   | 15/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.7244, acc: 0.6387, no_result: 0.1230, loss: 1.4293 ||:  67%|######6   | 16/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.7216, acc: 0.6324, no_result: 0.1305, loss: 1.4168 ||:  71%|#######   | 17/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7172, acc: 0.6267, no_result: 0.1354, loss: 1.4305 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.7192, acc: 0.6332, no_result: 0.1316, loss: 1.4090 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.7225, acc: 0.6375, no_result: 0.1281, loss: 1.3953 ||:  83%|########3 | 20/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7186, acc: 0.6354, no_result: 0.1235, loss: 1.4015 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7243, acc: 0.6335, no_result: 0.1222, loss: 1.3948 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.7267, acc: 0.6413, no_result: 0.1168, loss: 1.3715 ||:  96%|#########5| 23/24 [00:08<00:00,  2.72it/s]
+BLEU: 0.7266, acc: 0.6470, no_result: 0.1120, loss: 1.3971 ||: 100%|##########| 24/24 [00:08<00:00,  3.38it/s]
+BLEU: 0.7266, acc: 0.6470, no_result: 0.1120, loss: 1.3971 ||: 100%|##########| 24/24 [00:08<00:00,  2.75it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:   1%|          | 1/110 [00:00<01:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0119 ||:   2%|1         | 2/110 [00:01<01:09,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:   3%|2         | 3/110 [00:02<01:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:   4%|3         | 4/110 [00:02<01:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:   5%|4         | 5/110 [00:03<01:15,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:   5%|5         | 6/110 [00:04<01:09,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:   6%|6         | 7/110 [00:04<01:06,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:   7%|7         | 8/110 [00:05<01:06,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:   8%|8         | 9/110 [00:06<01:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:   9%|9         | 10/110 [00:06<01:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  10%|#         | 11/110 [00:07<01:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  11%|#         | 12/110 [00:08<01:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  12%|#1        | 13/110 [00:08<01:01,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  13%|#2        | 14/110 [00:09<01:02,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  14%|#3        | 15/110 [00:09<01:00,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  15%|#4        | 16/110 [00:10<01:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  15%|#5        | 17/110 [00:11<00:58,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  16%|#6        | 18/110 [00:11<00:56,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  17%|#7        | 19/110 [00:12<00:55,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  18%|#8        | 20/110 [00:13<01:16,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  19%|#9        | 21/110 [00:14<01:07,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  20%|##        | 22/110 [00:15<01:04,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  21%|##        | 23/110 [00:15<01:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  22%|##1       | 24/110 [00:16<00:57,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  23%|##2       | 25/110 [00:16<00:56,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  24%|##3       | 26/110 [00:17<00:59,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  25%|##4       | 27/110 [00:18<00:58,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  25%|##5       | 28/110 [00:19<00:56,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  26%|##6       | 29/110 [00:19<00:54,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  27%|##7       | 30/110 [00:20<00:51,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  28%|##8       | 31/110 [00:21<00:55,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  29%|##9       | 32/110 [00:21<00:55,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  30%|###       | 33/110 [00:22<00:54,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  31%|###       | 34/110 [00:23<00:52,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  32%|###1      | 35/110 [00:23<00:50,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  33%|###2      | 36/110 [00:24<00:49,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  34%|###3      | 37/110 [00:25<00:49,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  35%|###4      | 38/110 [00:25<00:46,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  35%|###5      | 39/110 [00:26<00:45,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  36%|###6      | 40/110 [00:27<00:45,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  37%|###7      | 41/110 [00:27<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  38%|###8      | 42/110 [00:28<00:44,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  39%|###9      | 43/110 [00:29<00:44,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  40%|####      | 44/110 [00:29<00:44,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  41%|####      | 45/110 [00:30<00:43,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  42%|####1     | 46/110 [00:31<00:45,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  43%|####2     | 47/110 [00:31<00:44,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  44%|####3     | 48/110 [00:32<00:42,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  45%|####4     | 49/110 [00:33<00:41,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  45%|####5     | 50/110 [00:33<00:41,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  46%|####6     | 51/110 [00:34<00:39,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  47%|####7     | 52/110 [00:35<00:38,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  48%|####8     | 53/110 [00:35<00:37,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  49%|####9     | 54/110 [00:36<00:37,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  50%|#####     | 55/110 [00:37<00:36,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  51%|#####     | 56/110 [00:37<00:36,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  52%|#####1    | 57/110 [00:38<00:35,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  54%|#####3    | 59/110 [00:39<00:33,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  55%|#####4    | 60/110 [00:40<00:32,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  56%|#####6    | 62/110 [00:41<00:32,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  58%|#####8    | 64/110 [00:43<00:30,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  59%|#####9    | 65/110 [00:43<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  60%|######    | 66/110 [00:44<00:30,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  61%|######    | 67/110 [00:45<00:28,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  62%|######1   | 68/110 [00:46<00:29,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  63%|######2   | 69/110 [00:46<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  64%|######3   | 70/110 [00:47<00:28,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  65%|######4   | 71/110 [00:48<00:27,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  65%|######5   | 72/110 [00:48<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  66%|######6   | 73/110 [00:49<00:24,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  67%|######7   | 74/110 [00:50<00:24,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  68%|######8   | 75/110 [00:50<00:22,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  69%|######9   | 76/110 [00:51<00:21,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  70%|#######   | 77/110 [00:51<00:21,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  71%|#######   | 78/110 [00:52<00:20,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  72%|#######1  | 79/110 [00:53<00:19,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  74%|#######3  | 81/110 [00:54<00:19,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  75%|#######4  | 82/110 [00:55<00:19,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  75%|#######5  | 83/110 [00:56<00:20,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  76%|#######6  | 84/110 [00:57<00:20,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  77%|#######7  | 85/110 [00:57<00:19,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  78%|#######8  | 86/110 [00:58<00:18,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  79%|#######9  | 87/110 [00:59<00:18,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  80%|########  | 88/110 [01:00<00:17,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  81%|########  | 89/110 [01:01<00:15,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  82%|########1 | 90/110 [01:01<00:14,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  83%|########2 | 91/110 [01:02<00:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  84%|########3 | 92/110 [01:02<00:12,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  85%|########4 | 93/110 [01:03<00:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  85%|########5 | 94/110 [01:04<00:11,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  86%|########6 | 95/110 [01:05<00:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  87%|########7 | 96/110 [01:05<00:09,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  88%|########8 | 97/110 [01:06<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  89%|########9 | 98/110 [01:07<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  90%|######### | 99/110 [01:07<00:07,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  91%|######### | 100/110 [01:08<00:07,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  92%|#########1| 101/110 [01:09<00:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  93%|#########2| 102/110 [01:09<00:05,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  94%|#########3| 103/110 [01:10<00:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  95%|#########4| 104/110 [01:11<00:04,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  95%|#########5| 105/110 [01:11<00:03,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  96%|#########6| 106/110 [01:12<00:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  97%|#########7| 107/110 [01:13<00:01,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  98%|#########8| 108/110 [01:13<00:01,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  99%|#########9| 109/110 [01:14<00:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||: 100%|##########| 110/110 [01:14<00:00,  1.79it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2863 ||:   4%|4         | 1/24 [00:00<00:09,  2.55it/s]
+BLEU: 0.7072, acc: 0.6719, no_result: 0.1250, loss: 1.2003 ||:   8%|8         | 2/24 [00:00<00:08,  2.72it/s]
+BLEU: 0.6840, acc: 0.6458, no_result: 0.1354, loss: 1.3294 ||:  12%|#2        | 3/24 [00:01<00:07,  2.79it/s]
+BLEU: 0.7097, acc: 0.6484, no_result: 0.1406, loss: 1.2330 ||:  17%|#6        | 4/24 [00:01<00:07,  2.71it/s]
+BLEU: 0.7331, acc: 0.6750, no_result: 0.1375, loss: 1.1833 ||:  21%|##        | 5/24 [00:01<00:07,  2.68it/s]
+BLEU: 0.7344, acc: 0.6615, no_result: 0.1302, loss: 1.1800 ||:  25%|##5       | 6/24 [00:02<00:06,  2.77it/s]
+BLEU: 0.7444, acc: 0.6741, no_result: 0.1205, loss: 1.1585 ||:  29%|##9       | 7/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.7389, acc: 0.6836, no_result: 0.1172, loss: 1.1890 ||:  33%|###3      | 8/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7295, acc: 0.6910, no_result: 0.1181, loss: 1.1834 ||:  38%|###7      | 9/24 [00:03<00:05,  2.88it/s]
+BLEU: 0.7389, acc: 0.6781, no_result: 0.1062, loss: 1.2045 ||:  42%|####1     | 10/24 [00:03<00:04,  2.83it/s]
+BLEU: 0.7380, acc: 0.6676, no_result: 0.1080, loss: 1.2564 ||:  46%|####5     | 11/24 [00:03<00:04,  2.71it/s]
+BLEU: 0.7346, acc: 0.6719, no_result: 0.1068, loss: 1.2692 ||:  50%|#####     | 12/24 [00:04<00:04,  2.80it/s]
+BLEU: 0.7336, acc: 0.6707, no_result: 0.1106, loss: 1.2935 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.69it/s]
+BLEU: 0.7328, acc: 0.6585, no_result: 0.1071, loss: 1.3415 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.61it/s]
+BLEU: 0.7260, acc: 0.6396, no_result: 0.1271, loss: 1.4024 ||:  62%|######2   | 15/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.7222, acc: 0.6348, no_result: 0.1289, loss: 1.4276 ||:  67%|######6   | 16/24 [00:06<00:03,  2.37it/s]
+BLEU: 0.7199, acc: 0.6287, no_result: 0.1360, loss: 1.4149 ||:  71%|#######   | 17/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.7151, acc: 0.6267, no_result: 0.1389, loss: 1.4278 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.42it/s]
+BLEU: 0.7169, acc: 0.6332, no_result: 0.1349, loss: 1.4069 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.7192, acc: 0.6375, no_result: 0.1313, loss: 1.3938 ||:  83%|########3 | 20/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.7171, acc: 0.6354, no_result: 0.1265, loss: 1.3991 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7218, acc: 0.6321, no_result: 0.1250, loss: 1.3927 ||:  92%|#########1| 22/24 [00:08<00:00,  2.63it/s]
+BLEU: 0.7251, acc: 0.6386, no_result: 0.1209, loss: 1.3689 ||:  96%|#########5| 23/24 [00:08<00:00,  2.71it/s]
+BLEU: 0.7250, acc: 0.6444, no_result: 0.1159, loss: 1.3939 ||: 100%|##########| 24/24 [00:08<00:00,  3.36it/s]
+BLEU: 0.7250, acc: 0.6444, no_result: 0.1159, loss: 1.3939 ||: 100%|##########| 24/24 [00:08<00:00,  2.74it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:   1%|          | 1/110 [00:00<01:16,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:   2%|1         | 2/110 [00:01<01:15,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:   3%|2         | 3/110 [00:02<01:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:   4%|3         | 4/110 [00:02<01:10,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:   5%|4         | 5/110 [00:03<01:08,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:   5%|5         | 6/110 [00:03<01:07,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:   6%|6         | 7/110 [00:04<01:05,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:   7%|7         | 8/110 [00:05<01:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:   8%|8         | 9/110 [00:05<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:   9%|9         | 10/110 [00:07<01:30,  1.10it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  10%|#         | 11/110 [00:07<01:19,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  11%|#         | 12/110 [00:08<01:15,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  12%|#1        | 13/110 [00:09<01:11,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  13%|#2        | 14/110 [00:10<01:10,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  14%|#3        | 15/110 [00:10<01:10,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  15%|#4        | 16/110 [00:11<01:07,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  15%|#5        | 17/110 [00:12<01:04,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  16%|#6        | 18/110 [00:12<01:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  17%|#7        | 19/110 [00:13<01:01,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  18%|#8        | 20/110 [00:14<01:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  19%|#9        | 21/110 [00:14<01:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  20%|##        | 22/110 [00:15<00:59,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  21%|##        | 23/110 [00:16<00:58,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  22%|##1       | 24/110 [00:16<00:58,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  23%|##2       | 25/110 [00:17<00:56,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  24%|##3       | 26/110 [00:18<00:57,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  25%|##4       | 27/110 [00:18<00:54,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  25%|##5       | 28/110 [00:19<00:55,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  26%|##6       | 29/110 [00:20<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  27%|##7       | 30/110 [00:20<00:50,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  28%|##8       | 31/110 [00:21<00:56,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  29%|##9       | 32/110 [00:22<00:54,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  30%|###       | 33/110 [00:22<00:52,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  31%|###       | 34/110 [00:23<00:51,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  32%|###1      | 35/110 [00:24<00:50,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  33%|###2      | 36/110 [00:24<00:48,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  34%|###3      | 37/110 [00:25<00:48,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  35%|###4      | 38/110 [00:26<00:48,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  35%|###5      | 39/110 [00:26<00:49,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  36%|###6      | 40/110 [00:27<00:50,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  37%|###7      | 41/110 [00:28<00:51,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  38%|###8      | 42/110 [00:29<00:49,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  39%|###9      | 43/110 [00:29<00:46,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  40%|####      | 44/110 [00:30<00:44,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  41%|####      | 45/110 [00:31<00:43,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  42%|####1     | 46/110 [00:31<00:43,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  43%|####2     | 47/110 [00:32<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  44%|####3     | 48/110 [00:33<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  45%|####4     | 49/110 [00:33<00:39,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  45%|####5     | 50/110 [00:34<00:37,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  46%|####6     | 51/110 [00:35<00:37,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  47%|####7     | 52/110 [00:35<00:36,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  48%|####8     | 53/110 [00:36<00:36,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  49%|####9     | 54/110 [00:37<00:38,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  50%|#####     | 55/110 [00:37<00:38,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  51%|#####     | 56/110 [00:38<00:38,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  52%|#####1    | 57/110 [00:39<00:36,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  53%|#####2    | 58/110 [00:39<00:37,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  54%|#####3    | 59/110 [00:40<00:34,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  55%|#####4    | 60/110 [00:41<00:36,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  55%|#####5    | 61/110 [00:42<00:37,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  56%|#####6    | 62/110 [00:42<00:35,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  57%|#####7    | 63/110 [00:43<00:32,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  58%|#####8    | 64/110 [00:44<00:33,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  59%|#####9    | 65/110 [00:44<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  60%|######    | 66/110 [00:45<00:30,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  61%|######    | 67/110 [00:46<00:28,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  62%|######1   | 68/110 [00:46<00:27,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  63%|######2   | 69/110 [00:47<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  64%|######3   | 70/110 [00:48<00:26,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  65%|######4   | 71/110 [00:48<00:25,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  65%|######5   | 72/110 [00:49<00:23,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  66%|######6   | 73/110 [00:50<00:23,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  67%|######7   | 74/110 [00:50<00:24,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  68%|######8   | 75/110 [00:51<00:23,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  69%|######9   | 76/110 [00:52<00:24,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  70%|#######   | 77/110 [00:53<00:22,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  71%|#######   | 78/110 [00:53<00:21,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  73%|#######2  | 80/110 [00:55<00:21,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  74%|#######3  | 81/110 [00:55<00:20,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  75%|#######4  | 82/110 [00:56<00:18,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  75%|#######5  | 83/110 [00:57<00:18,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  77%|#######7  | 85/110 [00:58<00:15,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  78%|#######8  | 86/110 [00:59<00:15,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  79%|#######9  | 87/110 [00:59<00:14,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  80%|########  | 88/110 [01:00<00:13,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  81%|########  | 89/110 [01:00<00:13,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  82%|########1 | 90/110 [01:01<00:13,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  83%|########2 | 91/110 [01:02<00:12,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  84%|########3 | 92/110 [01:02<00:11,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  85%|########4 | 93/110 [01:03<00:11,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  85%|########5 | 94/110 [01:04<00:10,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  86%|########6 | 95/110 [01:04<00:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  87%|########7 | 96/110 [01:05<00:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  88%|########8 | 97/110 [01:06<00:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  89%|########9 | 98/110 [01:06<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  90%|######### | 99/110 [01:07<00:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  91%|######### | 100/110 [01:08<00:06,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  92%|#########1| 101/110 [01:08<00:05,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  93%|#########2| 102/110 [01:09<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  94%|#########3| 103/110 [01:10<00:04,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  95%|#########4| 104/110 [01:10<00:03,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  95%|#########5| 105/110 [01:11<00:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  96%|#########6| 106/110 [01:12<00:02,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  97%|#########7| 107/110 [01:12<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  98%|#########8| 108/110 [01:13<00:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  99%|#########9| 109/110 [01:14<00:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||: 100%|##########| 110/110 [01:15<00:00,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||: 100%|##########| 110/110 [01:15<00:00,  1.46it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2644 ||:   4%|4         | 1/24 [00:00<00:08,  2.73it/s]
+BLEU: 0.7072, acc: 0.6406, no_result: 0.1094, loss: 1.1959 ||:   8%|8         | 2/24 [00:00<00:07,  2.83it/s]
+BLEU: 0.6840, acc: 0.6146, no_result: 0.1354, loss: 1.3293 ||:  12%|#2        | 3/24 [00:01<00:07,  2.84it/s]
+BLEU: 0.7097, acc: 0.6250, no_result: 0.1406, loss: 1.2346 ||:  17%|#6        | 4/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.7341, acc: 0.6562, no_result: 0.1375, loss: 1.1851 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7342, acc: 0.6406, no_result: 0.1302, loss: 1.1858 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.7442, acc: 0.6562, no_result: 0.1205, loss: 1.1644 ||:  29%|##9       | 7/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7388, acc: 0.6680, no_result: 0.1133, loss: 1.1949 ||:  33%|###3      | 8/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.7294, acc: 0.6736, no_result: 0.1181, loss: 1.1913 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7394, acc: 0.6594, no_result: 0.1094, loss: 1.2126 ||:  42%|####1     | 10/24 [00:03<00:04,  2.85it/s]
+BLEU: 0.7379, acc: 0.6477, no_result: 0.1136, loss: 1.2650 ||:  46%|####5     | 11/24 [00:03<00:04,  2.73it/s]
+BLEU: 0.7331, acc: 0.6562, no_result: 0.1094, loss: 1.2775 ||:  50%|#####     | 12/24 [00:04<00:04,  2.81it/s]
+BLEU: 0.7322, acc: 0.6562, no_result: 0.1130, loss: 1.3028 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7301, acc: 0.6429, no_result: 0.1116, loss: 1.3518 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.62it/s]
+BLEU: 0.7219, acc: 0.6229, no_result: 0.1313, loss: 1.4121 ||:  62%|######2   | 15/24 [00:05<00:03,  2.42it/s]
+BLEU: 0.7203, acc: 0.6211, no_result: 0.1328, loss: 1.4370 ||:  67%|######6   | 16/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.7178, acc: 0.6140, no_result: 0.1415, loss: 1.4238 ||:  71%|#######   | 17/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.7135, acc: 0.6146, no_result: 0.1424, loss: 1.4363 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.42it/s]
+BLEU: 0.7151, acc: 0.6217, no_result: 0.1382, loss: 1.4151 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.51it/s]
+BLEU: 0.7174, acc: 0.6266, no_result: 0.1344, loss: 1.4010 ||:  83%|########3 | 20/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.7154, acc: 0.6250, no_result: 0.1295, loss: 1.4063 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7202, acc: 0.6207, no_result: 0.1278, loss: 1.4007 ||:  92%|#########1| 22/24 [00:08<00:00,  2.63it/s]
+BLEU: 0.7228, acc: 0.6291, no_result: 0.1223, loss: 1.3769 ||:  96%|#########5| 23/24 [00:08<00:00,  2.71it/s]
+BLEU: 0.7227, acc: 0.6353, no_result: 0.1172, loss: 1.4013 ||: 100%|##########| 24/24 [00:08<00:00,  3.36it/s]
+BLEU: 0.7227, acc: 0.6353, no_result: 0.1172, loss: 1.4013 ||: 100%|##########| 24/24 [00:08<00:00,  2.75it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:   1%|          | 1/110 [00:00<01:34,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:   2%|1         | 2/110 [00:01<01:20,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:   3%|2         | 3/110 [00:02<01:17,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:   4%|3         | 4/110 [00:02<01:14,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:   5%|4         | 5/110 [00:03<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:   5%|5         | 6/110 [00:04<01:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:   6%|6         | 7/110 [00:04<01:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:   7%|7         | 8/110 [00:05<01:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:   8%|8         | 9/110 [00:06<01:14,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:   9%|9         | 10/110 [00:07<01:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  10%|#         | 11/110 [00:07<01:07,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  11%|#         | 12/110 [00:08<01:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  12%|#1        | 13/110 [00:08<01:04,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  13%|#2        | 14/110 [00:09<01:01,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  14%|#3        | 15/110 [00:10<01:00,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  15%|#4        | 16/110 [00:10<01:02,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  15%|#5        | 17/110 [00:11<01:03,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  16%|#6        | 18/110 [00:12<01:00,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  17%|#7        | 19/110 [00:12<00:57,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  18%|#8        | 20/110 [00:13<00:57,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  19%|#9        | 21/110 [00:14<01:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0131 ||:  20%|##        | 22/110 [00:14<00:59,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0129 ||:  21%|##        | 23/110 [00:15<00:59,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0132 ||:  22%|##1       | 24/110 [00:16<00:57,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  23%|##2       | 25/110 [00:16<00:56,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  24%|##3       | 26/110 [00:17<00:57,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  25%|##4       | 27/110 [00:18<00:56,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  25%|##5       | 28/110 [00:18<00:54,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  26%|##6       | 29/110 [00:19<00:53,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  27%|##7       | 30/110 [00:20<00:53,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  28%|##8       | 31/110 [00:20<00:52,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  29%|##9       | 32/110 [00:21<00:50,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  30%|###       | 33/110 [00:22<00:48,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  31%|###       | 34/110 [00:22<00:48,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  32%|###1      | 35/110 [00:23<00:48,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  33%|###2      | 36/110 [00:24<00:49,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  34%|###3      | 37/110 [00:24<00:46,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  35%|###4      | 38/110 [00:25<00:49,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  35%|###5      | 39/110 [00:26<00:47,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  36%|###6      | 40/110 [00:26<00:46,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  37%|###7      | 41/110 [00:27<00:46,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  38%|###8      | 42/110 [00:28<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  39%|###9      | 43/110 [00:28<00:46,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  40%|####      | 44/110 [00:29<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  41%|####      | 45/110 [00:30<00:43,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  42%|####1     | 46/110 [00:30<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  43%|####2     | 47/110 [00:31<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  44%|####3     | 48/110 [00:32<00:42,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  45%|####4     | 49/110 [00:32<00:40,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  45%|####5     | 50/110 [00:33<00:40,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  46%|####6     | 51/110 [00:34<00:38,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  47%|####7     | 52/110 [00:34<00:38,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  48%|####8     | 53/110 [00:35<00:37,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  49%|####9     | 54/110 [00:36<00:35,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  50%|#####     | 55/110 [00:36<00:37,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  51%|#####     | 56/110 [00:37<00:36,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  52%|#####1    | 57/110 [00:38<00:34,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  53%|#####2    | 58/110 [00:38<00:34,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  54%|#####3    | 59/110 [00:39<00:33,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  55%|#####4    | 60/110 [00:40<00:31,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  55%|#####5    | 61/110 [00:40<00:32,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  56%|#####6    | 62/110 [00:41<00:31,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  58%|#####8    | 64/110 [00:42<00:32,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  59%|#####9    | 65/110 [00:43<00:31,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  60%|######    | 66/110 [00:44<00:29,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  61%|######    | 67/110 [00:44<00:28,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  62%|######1   | 68/110 [00:45<00:27,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  63%|######2   | 69/110 [00:46<00:27,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  64%|######3   | 70/110 [00:46<00:26,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  65%|######4   | 71/110 [00:47<00:25,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  65%|######5   | 72/110 [00:48<00:24,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  66%|######6   | 73/110 [00:48<00:23,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  67%|######7   | 74/110 [00:49<00:23,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  68%|######8   | 75/110 [00:50<00:25,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  69%|######9   | 76/110 [00:51<00:25,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  70%|#######   | 77/110 [00:51<00:23,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  71%|#######   | 78/110 [00:52<00:22,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  72%|#######1  | 79/110 [00:53<00:21,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  73%|#######2  | 80/110 [00:53<00:19,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  74%|#######3  | 81/110 [00:54<00:19,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  75%|#######4  | 82/110 [00:55<00:19,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  75%|#######5  | 83/110 [00:55<00:18,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  76%|#######6  | 84/110 [00:56<00:17,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  77%|#######7  | 85/110 [00:57<00:17,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  78%|#######8  | 86/110 [00:57<00:16,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  79%|#######9  | 87/110 [00:58<00:15,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  80%|########  | 88/110 [00:59<00:14,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  81%|########  | 89/110 [00:59<00:13,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  82%|########1 | 90/110 [01:00<00:12,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  83%|########2 | 91/110 [01:01<00:12,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  84%|########3 | 92/110 [01:01<00:11,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  85%|########4 | 93/110 [01:02<00:10,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  85%|########5 | 94/110 [01:03<00:10,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  86%|########6 | 95/110 [01:03<00:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  87%|########7 | 96/110 [01:04<00:08,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  88%|########8 | 97/110 [01:05<00:08,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  89%|########9 | 98/110 [01:05<00:07,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  90%|######### | 99/110 [01:06<00:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  91%|######### | 100/110 [01:07<00:08,  1.13it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  92%|#########1| 101/110 [01:08<00:07,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  93%|#########2| 102/110 [01:09<00:06,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  94%|#########3| 103/110 [01:09<00:05,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  95%|#########4| 104/110 [01:10<00:04,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  95%|#########5| 105/110 [01:11<00:03,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  96%|#########6| 106/110 [01:11<00:02,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  97%|#########7| 107/110 [01:12<00:02,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  98%|#########8| 108/110 [01:13<00:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  99%|#########9| 109/110 [01:13<00:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||: 100%|##########| 110/110 [01:14<00:00,  1.73it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2832 ||:   4%|4         | 1/24 [00:00<00:08,  2.71it/s]
+BLEU: 0.7045, acc: 0.6719, no_result: 0.1250, loss: 1.2100 ||:   8%|8         | 2/24 [00:00<00:07,  2.81it/s]
+BLEU: 0.6821, acc: 0.6458, no_result: 0.1354, loss: 1.3413 ||:  12%|#2        | 3/24 [00:01<00:07,  2.82it/s]
+BLEU: 0.7083, acc: 0.6562, no_result: 0.1328, loss: 1.2426 ||:  17%|#6        | 4/24 [00:01<00:07,  2.73it/s]
+BLEU: 0.7345, acc: 0.6813, no_result: 0.1313, loss: 1.1895 ||:  21%|##        | 5/24 [00:01<00:07,  2.69it/s]
+BLEU: 0.7347, acc: 0.6615, no_result: 0.1250, loss: 1.1895 ||:  25%|##5       | 6/24 [00:02<00:06,  2.79it/s]
+BLEU: 0.7447, acc: 0.6741, no_result: 0.1161, loss: 1.1700 ||:  29%|##9       | 7/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7447, acc: 0.6797, no_result: 0.1133, loss: 1.1983 ||:  33%|###3      | 8/24 [00:02<00:05,  2.97it/s]
+BLEU: 0.7346, acc: 0.6875, no_result: 0.1146, loss: 1.1926 ||:  38%|###7      | 9/24 [00:03<00:05,  2.90it/s]
+BLEU: 0.7434, acc: 0.6750, no_result: 0.1062, loss: 1.2128 ||:  42%|####1     | 10/24 [00:03<00:04,  2.85it/s]
+BLEU: 0.7420, acc: 0.6619, no_result: 0.1108, loss: 1.2654 ||:  46%|####5     | 11/24 [00:03<00:04,  2.72it/s]
+BLEU: 0.7368, acc: 0.6667, no_result: 0.1094, loss: 1.2800 ||:  50%|#####     | 12/24 [00:04<00:04,  2.80it/s]
+BLEU: 0.7356, acc: 0.6659, no_result: 0.1130, loss: 1.3050 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.70it/s]
+BLEU: 0.7346, acc: 0.6562, no_result: 0.1094, loss: 1.3508 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.61it/s]
+BLEU: 0.7264, acc: 0.6375, no_result: 0.1292, loss: 1.4126 ||:  62%|######2   | 15/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.7238, acc: 0.6309, no_result: 0.1309, loss: 1.4372 ||:  67%|######6   | 16/24 [00:06<00:03,  2.37it/s]
+BLEU: 0.7214, acc: 0.6250, no_result: 0.1379, loss: 1.4244 ||:  71%|#######   | 17/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.7173, acc: 0.6198, no_result: 0.1441, loss: 1.4377 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.42it/s]
+BLEU: 0.7191, acc: 0.6266, no_result: 0.1398, loss: 1.4163 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.7215, acc: 0.6312, no_result: 0.1359, loss: 1.4026 ||:  83%|########3 | 20/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7193, acc: 0.6295, no_result: 0.1310, loss: 1.4082 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7247, acc: 0.6264, no_result: 0.1293, loss: 1.4014 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.7273, acc: 0.6359, no_result: 0.1236, loss: 1.3777 ||:  96%|#########5| 23/24 [00:08<00:00,  2.72it/s]
+BLEU: 0.7272, acc: 0.6418, no_result: 0.1185, loss: 1.4017 ||: 100%|##########| 24/24 [00:08<00:00,  3.37it/s]
+BLEU: 0.7272, acc: 0.6418, no_result: 0.1185, loss: 1.4017 ||: 100%|##########| 24/24 [00:08<00:00,  2.74it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0128 ||:   1%|          | 1/110 [00:00<01:10,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:   2%|1         | 2/110 [00:01<01:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:   3%|2         | 3/110 [00:01<01:09,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:   4%|3         | 4/110 [00:02<01:17,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:   5%|4         | 5/110 [00:03<01:12,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:   5%|5         | 6/110 [00:04<01:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:   6%|6         | 7/110 [00:04<01:08,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:   7%|7         | 8/110 [00:05<01:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:   8%|8         | 9/110 [00:06<01:05,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:   9%|9         | 10/110 [00:06<01:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  10%|#         | 11/110 [00:07<01:05,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  11%|#         | 12/110 [00:08<01:04,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  12%|#1        | 13/110 [00:08<01:04,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  13%|#2        | 14/110 [00:09<01:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  14%|#3        | 15/110 [00:09<01:00,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  15%|#4        | 16/110 [00:10<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  15%|#5        | 17/110 [00:11<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  16%|#6        | 18/110 [00:11<01:00,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  17%|#7        | 19/110 [00:12<00:58,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  18%|#8        | 20/110 [00:13<00:57,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  19%|#9        | 21/110 [00:13<00:56,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  20%|##        | 22/110 [00:14<00:58,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  21%|##        | 23/110 [00:15<00:59,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  22%|##1       | 24/110 [00:15<00:56,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  23%|##2       | 25/110 [00:16<00:55,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  24%|##3       | 26/110 [00:17<00:54,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  25%|##4       | 27/110 [00:17<00:55,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  25%|##5       | 28/110 [00:18<00:56,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  26%|##6       | 29/110 [00:19<00:56,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  27%|##7       | 30/110 [00:19<00:55,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  28%|##8       | 31/110 [00:20<00:53,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  29%|##9       | 32/110 [00:21<00:53,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  30%|###       | 33/110 [00:22<00:56,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  31%|###       | 34/110 [00:22<00:55,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  32%|###1      | 35/110 [00:23<00:52,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  33%|###2      | 36/110 [00:24<00:50,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  34%|###3      | 37/110 [00:24<00:48,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  35%|###4      | 38/110 [00:25<00:47,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  35%|###5      | 39/110 [00:26<00:45,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  36%|###6      | 40/110 [00:26<00:45,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  37%|###7      | 41/110 [00:27<00:46,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  38%|###8      | 42/110 [00:28<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  39%|###9      | 43/110 [00:28<00:44,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  40%|####      | 44/110 [00:29<00:43,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  41%|####      | 45/110 [00:30<00:44,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  42%|####1     | 46/110 [00:30<00:44,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  43%|####2     | 47/110 [00:31<00:42,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  44%|####3     | 48/110 [00:32<00:41,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  45%|####4     | 49/110 [00:32<00:41,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  45%|####5     | 50/110 [00:33<00:40,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  46%|####6     | 51/110 [00:34<00:40,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  47%|####7     | 52/110 [00:34<00:37,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  48%|####8     | 53/110 [00:35<00:40,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  49%|####9     | 54/110 [00:36<00:38,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  50%|#####     | 55/110 [00:36<00:38,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  51%|#####     | 56/110 [00:37<00:36,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  52%|#####1    | 57/110 [00:38<00:35,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  53%|#####2    | 58/110 [00:38<00:34,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  54%|#####3    | 59/110 [00:39<00:34,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:  55%|#####4    | 60/110 [00:40<00:34,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  55%|#####5    | 61/110 [00:40<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:  56%|#####6    | 62/110 [00:41<00:32,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  57%|#####7    | 63/110 [00:42<00:31,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  58%|#####8    | 64/110 [00:43<00:32,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  59%|#####9    | 65/110 [00:43<00:32,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  60%|######    | 66/110 [00:44<00:30,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  61%|######    | 67/110 [00:45<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  62%|######1   | 68/110 [00:45<00:28,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  63%|######2   | 69/110 [00:46<00:27,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  64%|######3   | 70/110 [00:47<00:26,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  65%|######4   | 71/110 [00:47<00:24,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  65%|######5   | 72/110 [00:48<00:24,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  66%|######6   | 73/110 [00:49<00:23,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  67%|######7   | 74/110 [00:49<00:23,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  68%|######8   | 75/110 [00:50<00:22,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  69%|######9   | 76/110 [00:50<00:22,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  70%|#######   | 77/110 [00:51<00:21,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  71%|#######   | 78/110 [00:52<00:20,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  72%|#######1  | 79/110 [00:52<00:19,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  73%|#######2  | 80/110 [00:53<00:20,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  74%|#######3  | 81/110 [00:54<00:19,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  75%|#######4  | 82/110 [00:54<00:18,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  75%|#######5  | 83/110 [00:55<00:17,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  76%|#######6  | 84/110 [00:56<00:17,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  77%|#######7  | 85/110 [00:56<00:16,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  78%|#######8  | 86/110 [00:57<00:15,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  79%|#######9  | 87/110 [00:58<00:14,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  80%|########  | 88/110 [00:58<00:13,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  81%|########  | 89/110 [00:59<00:13,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  82%|########1 | 90/110 [01:00<00:18,  1.11it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  83%|########2 | 91/110 [01:01<00:15,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  84%|########3 | 92/110 [01:02<00:13,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  85%|########4 | 93/110 [01:02<00:12,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  85%|########5 | 94/110 [01:03<00:11,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  86%|########6 | 95/110 [01:04<00:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  87%|########7 | 96/110 [01:04<00:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  88%|########8 | 97/110 [01:05<00:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  89%|########9 | 98/110 [01:05<00:07,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  90%|######### | 99/110 [01:06<00:06,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  91%|######### | 100/110 [01:07<00:06,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  92%|#########1| 101/110 [01:08<00:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  93%|#########2| 102/110 [01:08<00:05,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  94%|#########3| 103/110 [01:09<00:04,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  95%|#########4| 104/110 [01:10<00:04,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  95%|#########5| 105/110 [01:10<00:03,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  96%|#########6| 106/110 [01:11<00:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  97%|#########7| 107/110 [01:12<00:02,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  98%|#########8| 108/110 [01:13<00:01,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  99%|#########9| 109/110 [01:13<00:00,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||: 100%|##########| 110/110 [01:14<00:00,  1.70it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||: 100%|##########| 110/110 [01:14<00:00,  1.49it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2738 ||:   4%|4         | 1/24 [00:00<00:08,  2.74it/s]
+BLEU: 0.7065, acc: 0.6562, no_result: 0.1094, loss: 1.2065 ||:   8%|8         | 2/24 [00:00<00:07,  2.84it/s]
+BLEU: 0.6836, acc: 0.6250, no_result: 0.1354, loss: 1.3387 ||:  12%|#2        | 3/24 [00:01<00:07,  2.85it/s]
+BLEU: 0.7094, acc: 0.6328, no_result: 0.1406, loss: 1.2425 ||:  17%|#6        | 4/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.7354, acc: 0.6625, no_result: 0.1375, loss: 1.1928 ||:  21%|##        | 5/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7353, acc: 0.6615, no_result: 0.1250, loss: 1.1938 ||:  25%|##5       | 6/24 [00:02<00:06,  2.80it/s]
+BLEU: 0.7452, acc: 0.6741, no_result: 0.1161, loss: 1.1729 ||:  29%|##9       | 7/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.7436, acc: 0.6797, no_result: 0.1133, loss: 1.2013 ||:  33%|###3      | 8/24 [00:02<00:05,  2.95it/s]
+BLEU: 0.7337, acc: 0.6875, no_result: 0.1146, loss: 1.1992 ||:  38%|###7      | 9/24 [00:03<00:05,  2.88it/s]
+BLEU: 0.7431, acc: 0.6719, no_result: 0.1031, loss: 1.2195 ||:  42%|####1     | 10/24 [00:03<00:04,  2.83it/s]
+BLEU: 0.7413, acc: 0.6562, no_result: 0.1108, loss: 1.2712 ||:  46%|####5     | 11/24 [00:03<00:04,  2.71it/s]
+BLEU: 0.7357, acc: 0.6641, no_result: 0.1068, loss: 1.2851 ||:  50%|#####     | 12/24 [00:04<00:04,  2.80it/s]
+BLEU: 0.7346, acc: 0.6635, no_result: 0.1106, loss: 1.3105 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.71it/s]
+BLEU: 0.7324, acc: 0.6496, no_result: 0.1094, loss: 1.3591 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.61it/s]
+BLEU: 0.7244, acc: 0.6292, no_result: 0.1292, loss: 1.4191 ||:  62%|######2   | 15/24 [00:05<00:03,  2.42it/s]
+BLEU: 0.7222, acc: 0.6250, no_result: 0.1309, loss: 1.4438 ||:  67%|######6   | 16/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.7196, acc: 0.6195, no_result: 0.1379, loss: 1.4312 ||:  71%|#######   | 17/24 [00:06<00:02,  2.44it/s]
+BLEU: 0.7149, acc: 0.6181, no_result: 0.1406, loss: 1.4432 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.42it/s]
+BLEU: 0.7173, acc: 0.6250, no_result: 0.1365, loss: 1.4212 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.7205, acc: 0.6297, no_result: 0.1328, loss: 1.4078 ||:  83%|########3 | 20/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7183, acc: 0.6280, no_result: 0.1280, loss: 1.4138 ||:  88%|########7 | 21/24 [00:07<00:01,  2.67it/s]
+BLEU: 0.7241, acc: 0.6236, no_result: 0.1264, loss: 1.4090 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.7265, acc: 0.6318, no_result: 0.1223, loss: 1.3846 ||:  96%|#########5| 23/24 [00:08<00:00,  2.71it/s]
+BLEU: 0.7264, acc: 0.6379, no_result: 0.1172, loss: 1.4089 ||: 100%|##########| 24/24 [00:08<00:00,  3.38it/s]
+BLEU: 0.7264, acc: 0.6379, no_result: 0.1172, loss: 1.4089 ||: 100%|##########| 24/24 [00:08<00:00,  2.75it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0092 ||:   1%|          | 1/110 [00:00<01:19,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:   2%|1         | 2/110 [00:01<01:23,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0124 ||:   3%|2         | 3/110 [00:02<01:26,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:   4%|3         | 4/110 [00:03<01:22,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0126 ||:   5%|4         | 5/110 [00:03<01:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:   5%|5         | 6/110 [00:04<01:14,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:   6%|6         | 7/110 [00:05<01:09,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:   7%|7         | 8/110 [00:05<01:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:   8%|8         | 9/110 [00:06<01:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:   9%|9         | 10/110 [00:06<01:04,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  10%|#         | 11/110 [00:07<01:05,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  11%|#         | 12/110 [00:08<01:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  12%|#1        | 13/110 [00:09<01:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  13%|#2        | 14/110 [00:09<01:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  14%|#3        | 15/110 [00:10<01:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  15%|#4        | 16/110 [00:11<01:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  15%|#5        | 17/110 [00:11<01:00,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  16%|#6        | 18/110 [00:12<00:58,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  17%|#7        | 19/110 [00:12<00:59,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  18%|#8        | 20/110 [00:13<00:58,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  19%|#9        | 21/110 [00:14<00:57,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  20%|##        | 22/110 [00:14<00:54,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  21%|##        | 23/110 [00:15<00:57,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  22%|##1       | 24/110 [00:16<00:57,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  23%|##2       | 25/110 [00:16<00:55,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  24%|##3       | 26/110 [00:17<00:56,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  25%|##4       | 27/110 [00:18<00:54,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  25%|##5       | 28/110 [00:18<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  26%|##6       | 29/110 [00:19<00:52,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  27%|##7       | 30/110 [00:20<00:51,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  28%|##8       | 31/110 [00:20<00:53,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  29%|##9       | 32/110 [00:21<00:54,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  30%|###       | 33/110 [00:22<00:53,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  31%|###       | 34/110 [00:23<00:52,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  32%|###1      | 35/110 [00:23<00:50,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  33%|###2      | 36/110 [00:24<00:52,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  34%|###3      | 37/110 [00:25<00:51,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  35%|###4      | 38/110 [00:25<00:49,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  35%|###5      | 39/110 [00:26<00:46,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  36%|###6      | 40/110 [00:26<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  37%|###7      | 41/110 [00:27<00:50,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  38%|###8      | 42/110 [00:28<00:47,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  39%|###9      | 43/110 [00:29<00:46,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  40%|####      | 44/110 [00:29<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  41%|####      | 45/110 [00:30<00:43,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  42%|####1     | 46/110 [00:31<00:45,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  43%|####2     | 47/110 [00:32<00:46,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  44%|####3     | 48/110 [00:32<00:43,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  45%|####4     | 49/110 [00:33<00:40,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  45%|####5     | 50/110 [00:33<00:39,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  46%|####6     | 51/110 [00:34<00:39,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  47%|####7     | 52/110 [00:35<00:40,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  48%|####8     | 53/110 [00:36<00:38,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  49%|####9     | 54/110 [00:36<00:37,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  50%|#####     | 55/110 [00:37<00:40,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  51%|#####     | 56/110 [00:38<00:39,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  52%|#####1    | 57/110 [00:38<00:37,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  53%|#####2    | 58/110 [00:39<00:35,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  54%|#####3    | 59/110 [00:40<00:35,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  55%|#####4    | 60/110 [00:41<00:34,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  55%|#####5    | 61/110 [00:41<00:33,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  56%|#####6    | 62/110 [00:42<00:31,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  57%|#####7    | 63/110 [00:42<00:30,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  58%|#####8    | 64/110 [00:43<00:32,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  59%|#####9    | 65/110 [00:44<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  60%|######    | 66/110 [00:44<00:28,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  61%|######    | 67/110 [00:45<00:27,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  62%|######1   | 68/110 [00:46<00:27,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  63%|######2   | 69/110 [00:46<00:26,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  64%|######3   | 70/110 [00:47<00:26,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  65%|######4   | 71/110 [00:48<00:26,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  65%|######5   | 72/110 [00:48<00:25,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  66%|######6   | 73/110 [00:49<00:24,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  67%|######7   | 74/110 [00:50<00:23,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  68%|######8   | 75/110 [00:50<00:21,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  69%|######9   | 76/110 [00:51<00:22,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  70%|#######   | 77/110 [00:52<00:21,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  71%|#######   | 78/110 [00:52<00:20,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  73%|#######2  | 80/110 [00:54<00:27,  1.10it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  74%|#######3  | 81/110 [00:55<00:24,  1.20it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  75%|#######4  | 82/110 [00:56<00:21,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  75%|#######5  | 83/110 [00:56<00:20,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  76%|#######6  | 84/110 [00:57<00:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  77%|#######7  | 85/110 [00:58<00:18,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  78%|#######8  | 86/110 [00:59<00:17,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  79%|#######9  | 87/110 [00:59<00:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  80%|########  | 88/110 [01:00<00:14,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  81%|########  | 89/110 [01:00<00:13,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  82%|########1 | 90/110 [01:01<00:13,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  83%|########2 | 91/110 [01:02<00:12,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  84%|########3 | 92/110 [01:02<00:11,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  85%|########4 | 93/110 [01:03<00:11,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  85%|########5 | 94/110 [01:04<00:10,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  86%|########6 | 95/110 [01:04<00:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  87%|########7 | 96/110 [01:05<00:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  88%|########8 | 97/110 [01:06<00:08,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  89%|########9 | 98/110 [01:06<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  90%|######### | 99/110 [01:07<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  91%|######### | 100/110 [01:08<00:06,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  92%|#########1| 101/110 [01:08<00:05,  1.63it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  93%|#########2| 102/110 [01:09<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  94%|#########3| 103/110 [01:09<00:04,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  95%|#########4| 104/110 [01:10<00:03,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  95%|#########5| 105/110 [01:11<00:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  96%|#########6| 106/110 [01:11<00:02,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  97%|#########7| 107/110 [01:12<00:01,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  98%|#########8| 108/110 [01:13<00:01,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  99%|#########9| 109/110 [01:13<00:00,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||: 100%|##########| 110/110 [01:14<00:00,  1.87it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||: 100%|##########| 110/110 [01:14<00:00,  1.48it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2800 ||:   4%|4         | 1/24 [00:00<00:08,  2.72it/s]
+BLEU: 0.7246, acc: 0.6406, no_result: 0.1250, loss: 1.2130 ||:   8%|8         | 2/24 [00:00<00:07,  2.81it/s]
+BLEU: 0.6991, acc: 0.6250, no_result: 0.1354, loss: 1.3438 ||:  12%|#2        | 3/24 [00:01<00:07,  2.82it/s]
+BLEU: 0.7207, acc: 0.6406, no_result: 0.1328, loss: 1.2459 ||:  17%|#6        | 4/24 [00:01<00:07,  2.73it/s]
+BLEU: 0.7418, acc: 0.6687, no_result: 0.1313, loss: 1.1967 ||:  21%|##        | 5/24 [00:01<00:07,  2.69it/s]
+BLEU: 0.7384, acc: 0.6562, no_result: 0.1250, loss: 1.1972 ||:  25%|##5       | 6/24 [00:02<00:06,  2.79it/s]
+BLEU: 0.7479, acc: 0.6696, no_result: 0.1161, loss: 1.1757 ||:  29%|##9       | 7/24 [00:02<00:05,  2.93it/s]
+BLEU: 0.7460, acc: 0.6797, no_result: 0.1094, loss: 1.2039 ||:  33%|###3      | 8/24 [00:02<00:05,  2.94it/s]
+BLEU: 0.7358, acc: 0.6875, no_result: 0.1111, loss: 1.2001 ||:  38%|###7      | 9/24 [00:03<00:05,  2.87it/s]
+BLEU: 0.7456, acc: 0.6719, no_result: 0.1031, loss: 1.2200 ||:  42%|####1     | 10/24 [00:03<00:04,  2.83it/s]
+BLEU: 0.7435, acc: 0.6562, no_result: 0.1136, loss: 1.2729 ||:  46%|####5     | 11/24 [00:03<00:04,  2.71it/s]
+BLEU: 0.7377, acc: 0.6615, no_result: 0.1146, loss: 1.2869 ||:  50%|#####     | 12/24 [00:04<00:04,  2.79it/s]
+BLEU: 0.7364, acc: 0.6611, no_result: 0.1202, loss: 1.3123 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.69it/s]
+BLEU: 0.7350, acc: 0.6473, no_result: 0.1161, loss: 1.3628 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.60it/s]
+BLEU: 0.7272, acc: 0.6292, no_result: 0.1375, loss: 1.4229 ||:  62%|######2   | 15/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.7245, acc: 0.6250, no_result: 0.1387, loss: 1.4470 ||:  67%|######6   | 16/24 [00:06<00:03,  2.38it/s]
+BLEU: 0.7217, acc: 0.6195, no_result: 0.1452, loss: 1.4344 ||:  71%|#######   | 17/24 [00:06<00:02,  2.45it/s]
+BLEU: 0.7179, acc: 0.6198, no_result: 0.1458, loss: 1.4478 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.7199, acc: 0.6266, no_result: 0.1414, loss: 1.4251 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.52it/s]
+BLEU: 0.7232, acc: 0.6312, no_result: 0.1375, loss: 1.4113 ||:  83%|########3 | 20/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7210, acc: 0.6295, no_result: 0.1324, loss: 1.4166 ||:  88%|########7 | 21/24 [00:07<00:01,  2.66it/s]
+BLEU: 0.7266, acc: 0.6250, no_result: 0.1307, loss: 1.4104 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.7284, acc: 0.6318, no_result: 0.1264, loss: 1.3861 ||:  96%|#########5| 23/24 [00:08<00:00,  2.71it/s]
+BLEU: 0.7285, acc: 0.6332, no_result: 0.1257, loss: 1.4104 ||: 100%|##########| 24/24 [00:08<00:00,  3.36it/s]
+BLEU: 0.7285, acc: 0.6332, no_result: 0.1257, loss: 1.4104 ||: 100%|##########| 24/24 [00:08<00:00,  2.74it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0245 ||:   1%|          | 1/110 [00:00<01:20,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0186 ||:   2%|1         | 2/110 [00:01<01:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:   3%|2         | 3/110 [00:01<01:10,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:   4%|3         | 4/110 [00:02<01:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0173 ||:   5%|4         | 5/110 [00:03<01:09,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:   5%|5         | 6/110 [00:04<01:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0168 ||:   6%|6         | 7/110 [00:04<01:12,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:   7%|7         | 8/110 [00:05<01:12,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:   8%|8         | 9/110 [00:06<01:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:   9%|9         | 10/110 [00:06<01:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  10%|#         | 11/110 [00:07<01:07,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  11%|#         | 12/110 [00:08<01:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  12%|#1        | 13/110 [00:08<01:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  13%|#2        | 14/110 [00:09<01:06,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  14%|#3        | 15/110 [00:10<01:03,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  15%|#4        | 16/110 [00:11<01:10,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  15%|#5        | 17/110 [00:11<01:06,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  16%|#6        | 18/110 [00:12<01:06,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  17%|#7        | 19/110 [00:13<01:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  18%|#8        | 20/110 [00:13<01:00,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  19%|#9        | 21/110 [00:14<01:00,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  20%|##        | 22/110 [00:15<01:01,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  21%|##        | 23/110 [00:15<00:59,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  22%|##1       | 24/110 [00:16<00:56,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  23%|##2       | 25/110 [00:17<00:57,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  24%|##3       | 26/110 [00:17<00:55,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  25%|##4       | 27/110 [00:18<00:53,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  25%|##5       | 28/110 [00:19<00:52,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  26%|##6       | 29/110 [00:19<00:53,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  27%|##7       | 30/110 [00:20<00:50,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  28%|##8       | 31/110 [00:20<00:49,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  29%|##9       | 32/110 [00:21<00:48,  1.62it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  30%|###       | 33/110 [00:22<00:49,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  31%|###       | 34/110 [00:22<00:47,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  32%|###1      | 35/110 [00:23<00:46,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  33%|###2      | 36/110 [00:24<00:49,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  34%|###3      | 37/110 [00:24<00:48,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  35%|###4      | 38/110 [00:25<00:46,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  35%|###5      | 39/110 [00:26<00:45,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  36%|###6      | 40/110 [00:26<00:44,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  37%|###7      | 41/110 [00:27<00:44,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  38%|###8      | 42/110 [00:28<00:43,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  39%|###9      | 43/110 [00:28<00:42,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  40%|####      | 44/110 [00:29<00:42,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  41%|####      | 45/110 [00:30<00:42,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  42%|####1     | 46/110 [00:30<00:41,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  43%|####2     | 47/110 [00:31<00:40,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  44%|####3     | 48/110 [00:31<00:38,  1.60it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  45%|####4     | 49/110 [00:32<00:41,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  45%|####5     | 50/110 [00:33<00:39,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  46%|####6     | 51/110 [00:34<00:41,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  47%|####7     | 52/110 [00:34<00:41,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  48%|####8     | 53/110 [00:35<00:39,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  49%|####9     | 54/110 [00:36<00:42,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  50%|#####     | 55/110 [00:36<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  51%|#####     | 56/110 [00:37<00:39,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  52%|#####1    | 57/110 [00:38<00:38,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  53%|#####2    | 58/110 [00:39<00:35,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  54%|#####3    | 59/110 [00:39<00:34,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  55%|#####4    | 60/110 [00:40<00:33,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  56%|#####6    | 62/110 [00:41<00:32,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  57%|#####7    | 63/110 [00:42<00:32,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  58%|#####8    | 64/110 [00:43<00:30,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  59%|#####9    | 65/110 [00:43<00:29,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  60%|######    | 66/110 [00:44<00:29,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  61%|######    | 67/110 [00:45<00:30,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  62%|######1   | 68/110 [00:45<00:28,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  63%|######2   | 69/110 [00:46<00:28,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  64%|######3   | 70/110 [00:48<00:37,  1.06it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  65%|######4   | 71/110 [00:48<00:33,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  65%|######5   | 72/110 [00:49<00:30,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  66%|######6   | 73/110 [00:50<00:29,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  67%|######7   | 74/110 [00:50<00:27,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  68%|######8   | 75/110 [00:51<00:24,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  69%|######9   | 76/110 [00:52<00:23,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  70%|#######   | 77/110 [00:52<00:22,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  71%|#######   | 78/110 [00:53<00:21,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  74%|#######3  | 81/110 [00:55<00:19,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  75%|#######4  | 82/110 [00:56<00:19,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  75%|#######5  | 83/110 [00:56<00:18,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  77%|#######7  | 85/110 [00:58<00:18,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  78%|#######8  | 86/110 [00:59<00:17,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  79%|#######9  | 87/110 [00:59<00:16,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  80%|########  | 88/110 [01:00<00:15,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  81%|########  | 89/110 [01:01<00:14,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  82%|########1 | 90/110 [01:01<00:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  83%|########2 | 91/110 [01:02<00:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  84%|########3 | 92/110 [01:03<00:11,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  85%|########4 | 93/110 [01:03<00:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  85%|########5 | 94/110 [01:04<00:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  86%|########6 | 95/110 [01:05<00:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  87%|########7 | 96/110 [01:05<00:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  88%|########8 | 97/110 [01:06<00:08,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  89%|########9 | 98/110 [01:07<00:07,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  90%|######### | 99/110 [01:07<00:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  91%|######### | 100/110 [01:08<00:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  92%|#########1| 101/110 [01:09<00:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  93%|#########2| 102/110 [01:09<00:05,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  94%|#########3| 103/110 [01:10<00:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  95%|#########4| 104/110 [01:11<00:04,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  95%|#########5| 105/110 [01:12<00:03,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  96%|#########6| 106/110 [01:12<00:02,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  97%|#########7| 107/110 [01:13<00:02,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  98%|#########8| 108/110 [01:14<00:01,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  99%|#########9| 109/110 [01:14<00:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||: 100%|##########| 110/110 [01:15<00:00,  1.75it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||: 100%|##########| 110/110 [01:15<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.3035 ||:   4%|4         | 1/24 [00:00<00:08,  2.69it/s]
+BLEU: 0.7241, acc: 0.6562, no_result: 0.1250, loss: 1.2245 ||:   8%|8         | 2/24 [00:00<00:07,  2.80it/s]
+BLEU: 0.6988, acc: 0.6354, no_result: 0.1354, loss: 1.3499 ||:  12%|#2        | 3/24 [00:01<00:07,  2.81it/s]
+BLEU: 0.7204, acc: 0.6484, no_result: 0.1328, loss: 1.2482 ||:  17%|#6        | 4/24 [00:01<00:07,  2.72it/s]
+BLEU: 0.7417, acc: 0.6750, no_result: 0.1313, loss: 1.1992 ||:  21%|##        | 5/24 [00:01<00:07,  2.67it/s]
+BLEU: 0.7385, acc: 0.6615, no_result: 0.1250, loss: 1.1958 ||:  25%|##5       | 6/24 [00:02<00:06,  2.77it/s]
+BLEU: 0.7479, acc: 0.6741, no_result: 0.1161, loss: 1.1757 ||:  29%|##9       | 7/24 [00:02<00:05,  2.90it/s]
+BLEU: 0.7420, acc: 0.6797, no_result: 0.1133, loss: 1.2033 ||:  33%|###3      | 8/24 [00:02<00:05,  2.92it/s]
+BLEU: 0.7322, acc: 0.6806, no_result: 0.1181, loss: 1.1992 ||:  38%|###7      | 9/24 [00:03<00:05,  2.85it/s]
+BLEU: 0.7418, acc: 0.6656, no_result: 0.1094, loss: 1.2192 ||:  42%|####1     | 10/24 [00:03<00:05,  2.80it/s]
+BLEU: 0.7401, acc: 0.6534, no_result: 0.1136, loss: 1.2724 ||:  46%|####5     | 11/24 [00:03<00:04,  2.69it/s]
+BLEU: 0.7346, acc: 0.6615, no_result: 0.1120, loss: 1.2868 ||:  50%|#####     | 12/24 [00:04<00:04,  2.77it/s]
+BLEU: 0.7336, acc: 0.6611, no_result: 0.1178, loss: 1.3125 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.68it/s]
+BLEU: 0.7324, acc: 0.6518, no_result: 0.1116, loss: 1.3616 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.60it/s]
+BLEU: 0.7248, acc: 0.6333, no_result: 0.1333, loss: 1.4223 ||:  62%|######2   | 15/24 [00:05<00:03,  2.40it/s]
+BLEU: 0.7222, acc: 0.6270, no_result: 0.1348, loss: 1.4465 ||:  67%|######6   | 16/24 [00:06<00:03,  2.36it/s]
+BLEU: 0.7196, acc: 0.6213, no_result: 0.1415, loss: 1.4341 ||:  71%|#######   | 17/24 [00:06<00:02,  2.42it/s]
+BLEU: 0.7173, acc: 0.6181, no_result: 0.1476, loss: 1.4462 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.41it/s]
+BLEU: 0.7193, acc: 0.6250, no_result: 0.1431, loss: 1.4238 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.50it/s]
+BLEU: 0.7227, acc: 0.6297, no_result: 0.1391, loss: 1.4104 ||:  83%|########3 | 20/24 [00:07<00:01,  2.64it/s]
+BLEU: 0.7205, acc: 0.6280, no_result: 0.1339, loss: 1.4160 ||:  88%|########7 | 21/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.7258, acc: 0.6236, no_result: 0.1321, loss: 1.4100 ||:  92%|#########1| 22/24 [00:08<00:00,  2.63it/s]
+BLEU: 0.7284, acc: 0.6304, no_result: 0.1277, loss: 1.3859 ||:  96%|#########5| 23/24 [00:08<00:00,  2.70it/s]
+BLEU: 0.7286, acc: 0.6366, no_result: 0.1224, loss: 1.4113 ||: 100%|##########| 24/24 [00:08<00:00,  3.35it/s]
+BLEU: 0.7286, acc: 0.6366, no_result: 0.1224, loss: 1.4113 ||: 100%|##########| 24/24 [00:08<00:00,  2.73it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0192 ||:   1%|          | 1/110 [00:00<01:17,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:   2%|1         | 2/110 [00:01<01:12,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0134 ||:   3%|2         | 3/110 [00:02<01:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0123 ||:   4%|3         | 4/110 [00:02<01:13,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0134 ||:   5%|4         | 5/110 [00:03<01:13,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0119 ||:   5%|5         | 6/110 [00:04<01:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0123 ||:   6%|6         | 7/110 [00:04<01:08,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0126 ||:   7%|7         | 8/110 [00:05<01:13,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0123 ||:   8%|8         | 9/110 [00:06<01:13,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:   9%|9         | 10/110 [00:06<01:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  10%|#         | 11/110 [00:07<01:11,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  11%|#         | 12/110 [00:08<01:08,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  12%|#1        | 13/110 [00:09<01:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  13%|#2        | 14/110 [00:09<01:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  14%|#3        | 15/110 [00:10<01:04,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  15%|#4        | 16/110 [00:11<01:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  15%|#5        | 17/110 [00:11<01:05,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  16%|#6        | 18/110 [00:12<01:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  17%|#7        | 19/110 [00:13<01:01,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  18%|#8        | 20/110 [00:13<00:58,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  19%|#9        | 21/110 [00:14<00:59,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  20%|##        | 22/110 [00:15<00:58,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  21%|##        | 23/110 [00:15<01:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  22%|##1       | 24/110 [00:16<00:58,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  23%|##2       | 25/110 [00:17<00:57,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  24%|##3       | 26/110 [00:18<00:59,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  25%|##4       | 27/110 [00:18<00:57,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  25%|##5       | 28/110 [00:19<00:54,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  26%|##6       | 29/110 [00:19<00:53,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  27%|##7       | 30/110 [00:20<00:57,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  28%|##8       | 31/110 [00:21<00:56,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  29%|##9       | 32/110 [00:22<00:54,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  30%|###       | 33/110 [00:22<00:55,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  31%|###       | 34/110 [00:23<00:53,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  32%|###1      | 35/110 [00:24<00:50,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  33%|###2      | 36/110 [00:24<00:50,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  34%|###3      | 37/110 [00:25<00:50,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  35%|###4      | 38/110 [00:26<00:50,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  35%|###5      | 39/110 [00:27<00:51,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  36%|###6      | 40/110 [00:27<00:50,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  37%|###7      | 41/110 [00:28<00:50,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  38%|###8      | 42/110 [00:29<00:47,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  39%|###9      | 43/110 [00:29<00:48,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  40%|####      | 44/110 [00:30<00:46,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  41%|####      | 45/110 [00:31<00:45,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  42%|####1     | 46/110 [00:32<00:45,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  43%|####2     | 47/110 [00:32<00:43,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  44%|####3     | 48/110 [00:33<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  45%|####4     | 49/110 [00:33<00:39,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  45%|####5     | 50/110 [00:34<00:39,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  46%|####6     | 51/110 [00:35<00:38,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  47%|####7     | 52/110 [00:35<00:37,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  48%|####8     | 53/110 [00:36<00:37,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  49%|####9     | 54/110 [00:37<00:36,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  50%|#####     | 55/110 [00:37<00:36,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  51%|#####     | 56/110 [00:38<00:38,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  52%|#####1    | 57/110 [00:39<00:36,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  53%|#####2    | 58/110 [00:40<00:36,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  54%|#####3    | 59/110 [00:40<00:36,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  55%|#####4    | 60/110 [00:42<00:45,  1.09it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  55%|#####5    | 61/110 [00:42<00:41,  1.19it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  56%|#####6    | 62/110 [00:43<00:36,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  57%|#####7    | 63/110 [00:44<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  58%|#####8    | 64/110 [00:44<00:31,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  59%|#####9    | 65/110 [00:45<00:31,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  60%|######    | 66/110 [00:46<00:29,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  61%|######    | 67/110 [00:46<00:29,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  62%|######1   | 68/110 [00:47<00:28,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  63%|######2   | 69/110 [00:48<00:27,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  64%|######3   | 70/110 [00:48<00:29,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  65%|######4   | 71/110 [00:49<00:27,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  65%|######5   | 72/110 [00:50<00:25,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  66%|######6   | 73/110 [00:50<00:25,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  67%|######7   | 74/110 [00:51<00:25,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  68%|######8   | 75/110 [00:52<00:23,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  69%|######9   | 76/110 [00:52<00:22,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  70%|#######   | 77/110 [00:53<00:21,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  71%|#######   | 78/110 [00:54<00:20,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  72%|#######1  | 79/110 [00:54<00:19,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  73%|#######2  | 80/110 [00:55<00:20,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  74%|#######3  | 81/110 [00:56<00:19,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  75%|#######4  | 82/110 [00:56<00:18,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  75%|#######5  | 83/110 [00:57<00:17,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  76%|#######6  | 84/110 [00:58<00:17,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  78%|#######8  | 86/110 [00:59<00:15,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  79%|#######9  | 87/110 [01:00<00:14,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  80%|########  | 88/110 [01:00<00:14,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  81%|########  | 89/110 [01:01<00:13,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  82%|########1 | 90/110 [01:02<00:13,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  83%|########2 | 91/110 [01:02<00:12,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  84%|########3 | 92/110 [01:03<00:12,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  85%|########4 | 93/110 [01:04<00:11,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  85%|########5 | 94/110 [01:04<00:10,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  86%|########6 | 95/110 [01:05<00:09,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  87%|########7 | 96/110 [01:06<00:08,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  88%|########8 | 97/110 [01:06<00:08,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  89%|########9 | 98/110 [01:07<00:07,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  90%|######### | 99/110 [01:08<00:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  91%|######### | 100/110 [01:08<00:06,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  92%|#########1| 101/110 [01:09<00:05,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  93%|#########2| 102/110 [01:10<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  94%|#########3| 103/110 [01:10<00:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  95%|#########4| 104/110 [01:11<00:03,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  95%|#########5| 105/110 [01:12<00:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  96%|#########6| 106/110 [01:12<00:02,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  97%|#########7| 107/110 [01:13<00:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  98%|#########8| 108/110 [01:14<00:01,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  99%|#########9| 109/110 [01:14<00:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||: 100%|##########| 110/110 [01:15<00:00,  1.68it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||: 100%|##########| 110/110 [01:15<00:00,  1.46it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2894 ||:   4%|4         | 1/24 [00:00<00:08,  2.68it/s]
+BLEU: 0.7140, acc: 0.6562, no_result: 0.1250, loss: 1.2217 ||:   8%|8         | 2/24 [00:00<00:07,  2.78it/s]
+BLEU: 0.6931, acc: 0.6250, no_result: 0.1458, loss: 1.3547 ||:  12%|#2        | 3/24 [00:01<00:07,  2.80it/s]
+BLEU: 0.7163, acc: 0.6406, no_result: 0.1406, loss: 1.2553 ||:  17%|#6        | 4/24 [00:01<00:07,  2.71it/s]
+BLEU: 0.7382, acc: 0.6687, no_result: 0.1375, loss: 1.2019 ||:  21%|##        | 5/24 [00:01<00:07,  2.67it/s]
+BLEU: 0.7345, acc: 0.6510, no_result: 0.1302, loss: 1.1978 ||:  25%|##5       | 6/24 [00:02<00:06,  2.77it/s]
+BLEU: 0.7445, acc: 0.6652, no_result: 0.1205, loss: 1.1767 ||:  29%|##9       | 7/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.7445, acc: 0.6719, no_result: 0.1172, loss: 1.2058 ||:  33%|###3      | 8/24 [00:02<00:05,  2.92it/s]
+BLEU: 0.7345, acc: 0.6771, no_result: 0.1181, loss: 1.2017 ||:  38%|###7      | 9/24 [00:03<00:05,  2.84it/s]
+BLEU: 0.7445, acc: 0.6656, no_result: 0.1094, loss: 1.2211 ||:  42%|####1     | 10/24 [00:03<00:05,  2.79it/s]
+BLEU: 0.7425, acc: 0.6534, no_result: 0.1136, loss: 1.2738 ||:  46%|####5     | 11/24 [00:03<00:04,  2.67it/s]
+BLEU: 0.7367, acc: 0.6615, no_result: 0.1094, loss: 1.2886 ||:  50%|#####     | 12/24 [00:04<00:04,  2.76it/s]
+BLEU: 0.7355, acc: 0.6635, no_result: 0.1130, loss: 1.3141 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.67it/s]
+BLEU: 0.7346, acc: 0.6518, no_result: 0.1071, loss: 1.3616 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.56it/s]
+BLEU: 0.7268, acc: 0.6312, no_result: 0.1292, loss: 1.4238 ||:  62%|######2   | 15/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.7241, acc: 0.6270, no_result: 0.1309, loss: 1.4472 ||:  67%|######6   | 16/24 [00:06<00:03,  2.34it/s]
+BLEU: 0.7220, acc: 0.6213, no_result: 0.1379, loss: 1.4343 ||:  71%|#######   | 17/24 [00:06<00:02,  2.41it/s]
+BLEU: 0.7176, acc: 0.6181, no_result: 0.1424, loss: 1.4466 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.40it/s]
+BLEU: 0.7196, acc: 0.6250, no_result: 0.1382, loss: 1.4244 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.49it/s]
+BLEU: 0.7229, acc: 0.6297, no_result: 0.1344, loss: 1.4106 ||:  83%|########3 | 20/24 [00:07<00:01,  2.64it/s]
+BLEU: 0.7207, acc: 0.6280, no_result: 0.1295, loss: 1.4168 ||:  88%|########7 | 21/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.7258, acc: 0.6236, no_result: 0.1278, loss: 1.4111 ||:  92%|#########1| 22/24 [00:08<00:00,  2.62it/s]
+BLEU: 0.7278, acc: 0.6304, no_result: 0.1236, loss: 1.3867 ||:  96%|#########5| 23/24 [00:08<00:00,  2.70it/s]
+BLEU: 0.7277, acc: 0.6366, no_result: 0.1185, loss: 1.4107 ||: 100%|##########| 24/24 [00:08<00:00,  3.35it/s]
+BLEU: 0.7277, acc: 0.6366, no_result: 0.1185, loss: 1.4107 ||: 100%|##########| 24/24 [00:08<00:00,  2.72it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0182 ||:   1%|          | 1/110 [00:00<01:19,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:   2%|1         | 2/110 [00:01<01:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:   3%|2         | 3/110 [00:02<01:11,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:   4%|3         | 4/110 [00:02<01:07,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:   5%|4         | 5/110 [00:03<01:09,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:   5%|5         | 6/110 [00:03<01:05,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:   6%|6         | 7/110 [00:04<01:07,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:   7%|7         | 8/110 [00:05<01:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0131 ||:   8%|8         | 9/110 [00:05<01:06,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:   9%|9         | 10/110 [00:06<01:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  10%|#         | 11/110 [00:07<01:06,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0131 ||:  11%|#         | 12/110 [00:07<01:02,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0129 ||:  12%|#1        | 13/110 [00:08<01:01,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0126 ||:  13%|#2        | 14/110 [00:09<01:01,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0126 ||:  14%|#3        | 15/110 [00:10<01:09,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0124 ||:  15%|#4        | 16/110 [00:10<01:08,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0122 ||:  15%|#5        | 17/110 [00:11<01:07,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0121 ||:  16%|#6        | 18/110 [00:12<01:06,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0118 ||:  17%|#7        | 19/110 [00:13<01:06,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0117 ||:  18%|#8        | 20/110 [00:13<01:05,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0118 ||:  19%|#9        | 21/110 [00:14<01:02,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0119 ||:  20%|##        | 22/110 [00:15<00:59,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0119 ||:  21%|##        | 23/110 [00:15<00:58,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0118 ||:  22%|##1       | 24/110 [00:16<00:59,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0123 ||:  23%|##2       | 25/110 [00:17<00:58,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0123 ||:  24%|##3       | 26/110 [00:17<00:58,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0122 ||:  25%|##4       | 27/110 [00:18<00:59,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0120 ||:  25%|##5       | 28/110 [00:19<00:58,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0130 ||:  26%|##6       | 29/110 [00:19<00:56,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0132 ||:  27%|##7       | 30/110 [00:20<00:56,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0134 ||:  28%|##8       | 31/110 [00:21<00:56,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  29%|##9       | 32/110 [00:21<00:52,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  30%|###       | 33/110 [00:22<00:50,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  31%|###       | 34/110 [00:23<00:49,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  32%|###1      | 35/110 [00:23<00:50,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  33%|###2      | 36/110 [00:24<00:48,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  34%|###3      | 37/110 [00:25<00:47,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  35%|###4      | 38/110 [00:25<00:46,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  35%|###5      | 39/110 [00:26<00:48,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  36%|###6      | 40/110 [00:27<00:47,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  37%|###7      | 41/110 [00:27<00:45,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  38%|###8      | 42/110 [00:28<00:48,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  39%|###9      | 43/110 [00:29<00:47,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  40%|####      | 44/110 [00:30<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  41%|####      | 45/110 [00:30<00:43,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  42%|####1     | 46/110 [00:31<00:43,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  43%|####2     | 47/110 [00:32<00:42,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  44%|####3     | 48/110 [00:32<00:39,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  45%|####4     | 49/110 [00:33<00:38,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  45%|####5     | 50/110 [00:34<00:54,  1.11it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  46%|####6     | 51/110 [00:35<00:48,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  47%|####7     | 52/110 [00:36<00:44,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  48%|####8     | 53/110 [00:36<00:43,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  49%|####9     | 54/110 [00:37<00:40,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  50%|#####     | 55/110 [00:38<00:38,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  51%|#####     | 56/110 [00:38<00:36,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  52%|#####1    | 57/110 [00:39<00:35,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  54%|#####3    | 59/110 [00:40<00:37,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  55%|#####4    | 60/110 [00:41<00:35,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  55%|#####5    | 61/110 [00:42<00:35,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  56%|#####6    | 62/110 [00:43<00:35,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  57%|#####7    | 63/110 [00:43<00:34,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  58%|#####8    | 64/110 [00:44<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  59%|#####9    | 65/110 [00:45<00:30,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  60%|######    | 66/110 [00:45<00:28,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  61%|######    | 67/110 [00:46<00:28,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  62%|######1   | 68/110 [00:47<00:28,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  63%|######2   | 69/110 [00:47<00:26,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  64%|######3   | 70/110 [00:48<00:25,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  65%|######4   | 71/110 [00:48<00:24,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  65%|######5   | 72/110 [00:49<00:24,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  66%|######6   | 73/110 [00:50<00:23,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  67%|######7   | 74/110 [00:50<00:23,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  68%|######8   | 75/110 [00:51<00:23,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  69%|######9   | 76/110 [00:52<00:21,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  70%|#######   | 77/110 [00:52<00:22,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  71%|#######   | 78/110 [00:53<00:21,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  74%|#######3  | 81/110 [00:55<00:19,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  75%|#######4  | 82/110 [00:56<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  76%|#######6  | 84/110 [00:57<00:16,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  77%|#######7  | 85/110 [00:58<00:16,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  78%|#######8  | 86/110 [00:58<00:15,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  79%|#######9  | 87/110 [00:59<00:14,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  80%|########  | 88/110 [01:00<00:14,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  81%|########  | 89/110 [01:00<00:13,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  82%|########1 | 90/110 [01:01<00:12,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  83%|########2 | 91/110 [01:01<00:12,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  84%|########3 | 92/110 [01:02<00:12,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  85%|########4 | 93/110 [01:03<00:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  85%|########5 | 94/110 [01:04<00:11,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  86%|########6 | 95/110 [01:04<00:10,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  87%|########7 | 96/110 [01:05<00:09,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  88%|########8 | 97/110 [01:06<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  89%|########9 | 98/110 [01:07<00:08,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  90%|######### | 99/110 [01:07<00:08,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  91%|######### | 100/110 [01:08<00:07,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  92%|#########1| 101/110 [01:09<00:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  93%|#########2| 102/110 [01:10<00:05,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  94%|#########3| 103/110 [01:10<00:05,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  95%|#########4| 104/110 [01:11<00:04,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  95%|#########5| 105/110 [01:11<00:03,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  96%|#########6| 106/110 [01:12<00:02,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  97%|#########7| 107/110 [01:13<00:01,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  98%|#########8| 108/110 [01:13<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  99%|#########9| 109/110 [01:14<00:00,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||: 100%|##########| 110/110 [01:14<00:00,  1.82it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||: 100%|##########| 110/110 [01:14<00:00,  1.47it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.5938, no_result: 0.1250, loss: 1.2712 ||:   4%|4         | 1/24 [00:00<00:08,  2.60it/s]
+BLEU: 0.7204, acc: 0.6406, no_result: 0.1250, loss: 1.2089 ||:   8%|8         | 2/24 [00:00<00:08,  2.70it/s]
+BLEU: 0.6963, acc: 0.6146, no_result: 0.1562, loss: 1.3478 ||:  12%|#2        | 3/24 [00:01<00:07,  2.69it/s]
+BLEU: 0.7187, acc: 0.6328, no_result: 0.1484, loss: 1.2502 ||:  17%|#6        | 4/24 [00:01<00:07,  2.65it/s]
+BLEU: 0.7403, acc: 0.6625, no_result: 0.1437, loss: 1.1989 ||:  21%|##        | 5/24 [00:01<00:07,  2.63it/s]
+BLEU: 0.7364, acc: 0.6458, no_result: 0.1354, loss: 1.1965 ||:  25%|##5       | 6/24 [00:02<00:06,  2.74it/s]
+BLEU: 0.7461, acc: 0.6607, no_result: 0.1250, loss: 1.1758 ||:  29%|##9       | 7/24 [00:02<00:05,  2.88it/s]
+BLEU: 0.7444, acc: 0.6680, no_result: 0.1211, loss: 1.2059 ||:  33%|###3      | 8/24 [00:02<00:05,  2.90it/s]
+BLEU: 0.7343, acc: 0.6736, no_result: 0.1250, loss: 1.2044 ||:  38%|###7      | 9/24 [00:03<00:05,  2.83it/s]
+BLEU: 0.7450, acc: 0.6625, no_result: 0.1156, loss: 1.2222 ||:  42%|####1     | 10/24 [00:03<00:05,  2.79it/s]
+BLEU: 0.7429, acc: 0.6477, no_result: 0.1222, loss: 1.2741 ||:  46%|####5     | 11/24 [00:04<00:04,  2.68it/s]
+BLEU: 0.7377, acc: 0.6562, no_result: 0.1172, loss: 1.2877 ||:  50%|#####     | 12/24 [00:04<00:04,  2.77it/s]
+BLEU: 0.7364, acc: 0.6587, no_result: 0.1226, loss: 1.3132 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.67it/s]
+BLEU: 0.7354, acc: 0.6473, no_result: 0.1161, loss: 1.3612 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.58it/s]
+BLEU: 0.7272, acc: 0.6292, no_result: 0.1396, loss: 1.4233 ||:  62%|######2   | 15/24 [00:05<00:03,  2.39it/s]
+BLEU: 0.7248, acc: 0.6270, no_result: 0.1406, loss: 1.4483 ||:  67%|######6   | 16/24 [00:06<00:03,  2.34it/s]
+BLEU: 0.7220, acc: 0.6213, no_result: 0.1471, loss: 1.4355 ||:  71%|#######   | 17/24 [00:06<00:02,  2.41it/s]
+BLEU: 0.7176, acc: 0.6198, no_result: 0.1493, loss: 1.4485 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.40it/s]
+BLEU: 0.7196, acc: 0.6266, no_result: 0.1447, loss: 1.4262 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.49it/s]
+BLEU: 0.7220, acc: 0.6312, no_result: 0.1406, loss: 1.4118 ||:  83%|########3 | 20/24 [00:07<00:01,  2.64it/s]
+BLEU: 0.7198, acc: 0.6295, no_result: 0.1354, loss: 1.4184 ||:  88%|########7 | 21/24 [00:07<00:01,  2.64it/s]
+BLEU: 0.7246, acc: 0.6250, no_result: 0.1335, loss: 1.4125 ||:  92%|#########1| 22/24 [00:08<00:00,  2.62it/s]
+BLEU: 0.7267, acc: 0.6332, no_result: 0.1277, loss: 1.3882 ||:  96%|#########5| 23/24 [00:08<00:00,  2.70it/s]
+BLEU: 0.7266, acc: 0.6392, no_result: 0.1224, loss: 1.4118 ||: 100%|##########| 24/24 [00:08<00:00,  3.36it/s]
+BLEU: 0.7266, acc: 0.6392, no_result: 0.1224, loss: 1.4118 ||: 100%|##########| 24/24 [00:08<00:00,  2.71it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0275 ||:   1%|          | 1/110 [00:00<01:35,  1.14it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:   2%|1         | 2/110 [00:01<01:17,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0198 ||:   3%|2         | 3/110 [00:02<01:16,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0166 ||:   4%|3         | 4/110 [00:02<01:12,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0171 ||:   5%|4         | 5/110 [00:03<01:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:   5%|5         | 6/110 [00:04<01:10,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:   6%|6         | 7/110 [00:04<01:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:   7%|7         | 8/110 [00:05<01:11,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:   8%|8         | 9/110 [00:06<01:11,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:   9%|9         | 10/110 [00:07<01:15,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0167 ||:  10%|#         | 11/110 [00:07<01:12,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0161 ||:  11%|#         | 12/110 [00:08<01:12,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0163 ||:  12%|#1        | 13/110 [00:09<01:08,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0164 ||:  13%|#2        | 14/110 [00:10<01:14,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  14%|#3        | 15/110 [00:10<01:10,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0180 ||:  15%|#4        | 16/110 [00:11<01:07,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0179 ||:  15%|#5        | 17/110 [00:12<01:07,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0176 ||:  16%|#6        | 18/110 [00:12<01:06,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0174 ||:  17%|#7        | 19/110 [00:13<01:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0170 ||:  18%|#8        | 20/110 [00:14<01:06,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0169 ||:  19%|#9        | 21/110 [00:15<01:05,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0165 ||:  20%|##        | 22/110 [00:15<01:06,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0162 ||:  21%|##        | 23/110 [00:16<01:01,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  22%|##1       | 24/110 [00:17<01:03,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0160 ||:  23%|##2       | 25/110 [00:18<01:02,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0159 ||:  24%|##3       | 26/110 [00:18<00:59,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  25%|##4       | 27/110 [00:19<00:58,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:  25%|##5       | 28/110 [00:20<00:58,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  26%|##6       | 29/110 [00:20<00:55,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  27%|##7       | 30/110 [00:21<00:59,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  28%|##8       | 31/110 [00:22<00:59,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  29%|##9       | 32/110 [00:23<00:57,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  30%|###       | 33/110 [00:23<00:55,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  31%|###       | 34/110 [00:24<00:54,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  32%|###1      | 35/110 [00:25<00:55,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  33%|###2      | 36/110 [00:26<00:54,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  34%|###3      | 37/110 [00:26<00:54,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  35%|###4      | 38/110 [00:27<00:53,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  35%|###5      | 39/110 [00:28<00:52,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  36%|###6      | 40/110 [00:29<01:10,  1.00s/it]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  37%|###7      | 41/110 [00:30<01:05,  1.06it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  38%|###8      | 42/110 [00:31<00:59,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  39%|###9      | 43/110 [00:32<00:54,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  40%|####      | 44/110 [00:32<00:51,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  41%|####      | 45/110 [00:33<00:49,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  42%|####1     | 46/110 [00:34<00:47,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  43%|####2     | 47/110 [00:34<00:45,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  44%|####3     | 48/110 [00:35<00:45,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  45%|####4     | 49/110 [00:36<00:49,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  45%|####5     | 50/110 [00:37<00:46,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  46%|####6     | 51/110 [00:38<00:44,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  47%|####7     | 52/110 [00:38<00:44,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  48%|####8     | 53/110 [00:39<00:43,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  49%|####9     | 54/110 [00:40<00:42,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  50%|#####     | 55/110 [00:41<00:40,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  51%|#####     | 56/110 [00:41<00:39,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  52%|#####1    | 57/110 [00:42<00:39,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  53%|#####2    | 58/110 [00:43<00:39,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  54%|#####3    | 59/110 [00:44<00:37,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  55%|#####4    | 60/110 [00:44<00:35,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  55%|#####5    | 61/110 [00:45<00:34,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  56%|#####6    | 62/110 [00:45<00:32,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  57%|#####7    | 63/110 [00:46<00:33,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  58%|#####8    | 64/110 [00:47<00:31,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  59%|#####9    | 65/110 [00:47<00:29,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  60%|######    | 66/110 [00:48<00:30,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  61%|######    | 67/110 [00:49<00:28,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  62%|######1   | 68/110 [00:50<00:28,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  63%|######2   | 69/110 [00:50<00:27,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  64%|######3   | 70/110 [00:51<00:25,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  65%|######4   | 71/110 [00:51<00:25,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  65%|######5   | 72/110 [00:52<00:24,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  66%|######6   | 73/110 [00:53<00:24,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  67%|######7   | 74/110 [00:54<00:24,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  68%|######8   | 75/110 [00:54<00:23,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  69%|######9   | 76/110 [00:55<00:24,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  70%|#######   | 77/110 [00:56<00:22,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  71%|#######   | 78/110 [00:56<00:21,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  72%|#######1  | 79/110 [00:57<00:20,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  73%|#######2  | 80/110 [00:58<00:21,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  74%|#######3  | 81/110 [00:58<00:19,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  75%|#######4  | 82/110 [00:59<00:18,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  75%|#######5  | 83/110 [01:00<00:17,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  76%|#######6  | 84/110 [01:00<00:18,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  77%|#######7  | 85/110 [01:01<00:17,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  78%|#######8  | 86/110 [01:02<00:16,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  79%|#######9  | 87/110 [01:02<00:15,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  80%|########  | 88/110 [01:03<00:14,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  81%|########  | 89/110 [01:04<00:13,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  82%|########1 | 90/110 [01:04<00:12,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  83%|########2 | 91/110 [01:05<00:12,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  84%|########3 | 92/110 [01:06<00:11,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  85%|########4 | 93/110 [01:06<00:10,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  85%|########5 | 94/110 [01:07<00:10,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  86%|########6 | 95/110 [01:08<00:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  87%|########7 | 96/110 [01:08<00:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  88%|########8 | 97/110 [01:09<00:08,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  89%|########9 | 98/110 [01:10<00:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  90%|######### | 99/110 [01:10<00:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  91%|######### | 100/110 [01:11<00:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  92%|#########1| 101/110 [01:12<00:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  93%|#########2| 102/110 [01:12<00:05,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  94%|#########3| 103/110 [01:13<00:04,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  95%|#########4| 104/110 [01:14<00:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  95%|#########5| 105/110 [01:14<00:03,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  96%|#########6| 106/110 [01:15<00:02,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  97%|#########7| 107/110 [01:16<00:01,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  98%|#########8| 108/110 [01:16<00:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  99%|#########9| 109/110 [01:17<00:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||: 100%|##########| 110/110 [01:17<00:00,  1.80it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||: 100%|##########| 110/110 [01:17<00:00,  1.42it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.2976 ||:   4%|4         | 1/24 [00:00<00:08,  2.61it/s]
+BLEU: 0.6980, acc: 0.6406, no_result: 0.1250, loss: 1.2205 ||:   8%|8         | 2/24 [00:00<00:08,  2.67it/s]
+BLEU: 0.6836, acc: 0.6250, no_result: 0.1354, loss: 1.3561 ||:  12%|#2        | 3/24 [00:01<00:07,  2.74it/s]
+BLEU: 0.7094, acc: 0.6406, no_result: 0.1328, loss: 1.2572 ||:  17%|#6        | 4/24 [00:01<00:07,  2.68it/s]
+BLEU: 0.7323, acc: 0.6687, no_result: 0.1313, loss: 1.2067 ||:  21%|##        | 5/24 [00:01<00:07,  2.65it/s]
+BLEU: 0.7320, acc: 0.6510, no_result: 0.1250, loss: 1.2028 ||:  25%|##5       | 6/24 [00:02<00:06,  2.76it/s]
+BLEU: 0.7424, acc: 0.6652, no_result: 0.1161, loss: 1.1818 ||:  29%|##9       | 7/24 [00:02<00:05,  2.90it/s]
+BLEU: 0.7427, acc: 0.6719, no_result: 0.1133, loss: 1.2108 ||:  33%|###3      | 8/24 [00:02<00:05,  2.92it/s]
+BLEU: 0.7328, acc: 0.6771, no_result: 0.1146, loss: 1.2048 ||:  38%|###7      | 9/24 [00:03<00:05,  2.85it/s]
+BLEU: 0.7431, acc: 0.6656, no_result: 0.1062, loss: 1.2233 ||:  42%|####1     | 10/24 [00:03<00:04,  2.81it/s]
+BLEU: 0.7413, acc: 0.6534, no_result: 0.1108, loss: 1.2763 ||:  46%|####5     | 11/24 [00:03<00:04,  2.70it/s]
+BLEU: 0.7361, acc: 0.6615, no_result: 0.1068, loss: 1.2907 ||:  50%|#####     | 12/24 [00:04<00:04,  2.79it/s]
+BLEU: 0.7345, acc: 0.6611, no_result: 0.1130, loss: 1.3172 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.68it/s]
+BLEU: 0.7332, acc: 0.6496, no_result: 0.1071, loss: 1.3683 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.60it/s]
+BLEU: 0.7268, acc: 0.6333, no_result: 0.1271, loss: 1.4299 ||:  62%|######2   | 15/24 [00:05<00:03,  2.41it/s]
+BLEU: 0.7241, acc: 0.6289, no_result: 0.1289, loss: 1.4538 ||:  67%|######6   | 16/24 [00:06<00:03,  2.37it/s]
+BLEU: 0.7216, acc: 0.6232, no_result: 0.1379, loss: 1.4413 ||:  71%|#######   | 17/24 [00:06<00:02,  2.43it/s]
+BLEU: 0.7192, acc: 0.6215, no_result: 0.1406, loss: 1.4551 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.41it/s]
+BLEU: 0.7221, acc: 0.6299, no_result: 0.1365, loss: 1.4325 ||:  79%|#######9  | 19/24 [00:07<00:01,  2.50it/s]
+BLEU: 0.7244, acc: 0.6344, no_result: 0.1328, loss: 1.4190 ||:  83%|########3 | 20/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.7221, acc: 0.6324, no_result: 0.1280, loss: 1.4241 ||:  88%|########7 | 21/24 [00:07<00:01,  2.65it/s]
+BLEU: 0.7274, acc: 0.6278, no_result: 0.1264, loss: 1.4173 ||:  92%|#########1| 22/24 [00:08<00:00,  2.64it/s]
+BLEU: 0.7300, acc: 0.6345, no_result: 0.1223, loss: 1.3921 ||:  96%|#########5| 23/24 [00:08<00:00,  2.72it/s]
+BLEU: 0.7298, acc: 0.6405, no_result: 0.1172, loss: 1.4165 ||: 100%|##########| 24/24 [00:08<00:00,  3.38it/s]
+BLEU: 0.7298, acc: 0.6405, no_result: 0.1172, loss: 1.4165 ||: 100%|##########| 24/24 [00:08<00:00,  2.73it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0062 ||:   1%|          | 1/110 [00:00<01:32,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0100 ||:   2%|1         | 2/110 [00:01<01:28,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0096 ||:   3%|2         | 3/110 [00:02<01:22,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0089 ||:   4%|3         | 4/110 [00:02<01:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0103 ||:   5%|4         | 5/110 [00:03<01:17,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0101 ||:   5%|5         | 6/110 [00:04<01:11,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0110 ||:   6%|6         | 7/110 [00:05<01:10,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0107 ||:   7%|7         | 8/110 [00:05<01:06,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0108 ||:   8%|8         | 9/110 [00:06<01:08,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0104 ||:   9%|9         | 10/110 [00:07<01:07,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0108 ||:  10%|#         | 11/110 [00:07<01:05,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0122 ||:  11%|#         | 12/110 [00:08<01:04,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0131 ||:  12%|#1        | 13/110 [00:09<01:06,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0132 ||:  13%|#2        | 14/110 [00:09<01:07,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0132 ||:  14%|#3        | 15/110 [00:10<01:05,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  15%|#4        | 16/110 [00:11<01:02,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0132 ||:  15%|#5        | 17/110 [00:11<01:01,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  16%|#6        | 18/110 [00:12<01:02,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  17%|#7        | 19/110 [00:13<01:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  18%|#8        | 20/110 [00:13<01:03,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0132 ||:  19%|#9        | 21/110 [00:14<01:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  20%|##        | 22/110 [00:15<01:02,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  21%|##        | 23/110 [00:15<01:00,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  22%|##1       | 24/110 [00:16<01:00,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  23%|##2       | 25/110 [00:17<01:01,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  24%|##3       | 26/110 [00:18<01:01,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  25%|##4       | 27/110 [00:18<00:58,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  25%|##5       | 28/110 [00:19<01:00,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  26%|##6       | 29/110 [00:20<01:03,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  27%|##7       | 30/110 [00:21<01:17,  1.03it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  28%|##8       | 31/110 [00:22<01:10,  1.12it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  29%|##9       | 32/110 [00:23<01:05,  1.18it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  30%|###       | 33/110 [00:24<01:02,  1.22it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  31%|###       | 34/110 [00:24<01:01,  1.23it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  32%|###1      | 35/110 [00:25<00:57,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  33%|###2      | 36/110 [00:26<00:55,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  34%|###3      | 37/110 [00:27<00:54,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  35%|###4      | 38/110 [00:27<00:52,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  35%|###5      | 39/110 [00:28<00:50,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  36%|###6      | 40/110 [00:29<00:51,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0144 ||:  37%|###7      | 41/110 [00:29<00:49,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  38%|###8      | 42/110 [00:30<00:50,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  39%|###9      | 43/110 [00:31<00:47,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  40%|####      | 44/110 [00:32<00:47,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  41%|####      | 45/110 [00:32<00:46,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  42%|####1     | 46/110 [00:33<00:46,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  43%|####2     | 47/110 [00:34<00:45,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  44%|####3     | 48/110 [00:34<00:44,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  45%|####4     | 49/110 [00:35<00:43,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  45%|####5     | 50/110 [00:36<00:41,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  46%|####6     | 51/110 [00:37<00:41,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  47%|####7     | 52/110 [00:37<00:42,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  48%|####8     | 53/110 [00:38<00:40,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  49%|####9     | 54/110 [00:39<00:39,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  50%|#####     | 55/110 [00:39<00:39,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  51%|#####     | 56/110 [00:40<00:38,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  52%|#####1    | 57/110 [00:41<00:37,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  53%|#####2    | 58/110 [00:42<00:38,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  54%|#####3    | 59/110 [00:42<00:38,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  55%|#####4    | 60/110 [00:43<00:37,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  55%|#####5    | 61/110 [00:44<00:36,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  56%|#####6    | 62/110 [00:45<00:36,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  57%|#####7    | 63/110 [00:45<00:34,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  58%|#####8    | 64/110 [00:46<00:33,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  59%|#####9    | 65/110 [00:47<00:33,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  60%|######    | 66/110 [00:48<00:31,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  61%|######    | 67/110 [00:48<00:30,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  62%|######1   | 68/110 [00:49<00:30,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  63%|######2   | 69/110 [00:50<00:29,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  64%|######3   | 70/110 [00:50<00:28,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  65%|######4   | 71/110 [00:51<00:28,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  65%|######5   | 72/110 [00:52<00:27,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  66%|######6   | 73/110 [00:53<00:25,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  67%|######7   | 74/110 [00:53<00:24,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  68%|######8   | 75/110 [00:54<00:24,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  69%|######9   | 76/110 [00:55<00:24,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  70%|#######   | 77/110 [00:55<00:22,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  71%|#######   | 78/110 [00:56<00:22,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  72%|#######1  | 79/110 [00:57<00:21,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  73%|#######2  | 80/110 [00:57<00:21,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  74%|#######3  | 81/110 [00:58<00:20,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  75%|#######4  | 82/110 [00:59<00:19,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  75%|#######5  | 83/110 [01:00<00:19,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  76%|#######6  | 84/110 [01:00<00:19,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  77%|#######7  | 85/110 [01:01<00:18,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  78%|#######8  | 86/110 [01:02<00:17,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  79%|#######9  | 87/110 [01:02<00:16,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  80%|########  | 88/110 [01:03<00:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  81%|########  | 89/110 [01:04<00:16,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  82%|########1 | 90/110 [01:05<00:15,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  83%|########2 | 91/110 [01:06<00:14,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  84%|########3 | 92/110 [01:07<00:14,  1.25it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  85%|########4 | 93/110 [01:07<00:13,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  85%|########5 | 94/110 [01:08<00:11,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  86%|########6 | 95/110 [01:09<00:11,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  87%|########7 | 96/110 [01:09<00:10,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  88%|########8 | 97/110 [01:10<00:09,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  89%|########9 | 98/110 [01:11<00:09,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  90%|######### | 99/110 [01:12<00:08,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  91%|######### | 100/110 [01:12<00:07,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0146 ||:  92%|#########1| 101/110 [01:13<00:07,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  93%|#########2| 102/110 [01:14<00:06,  1.28it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  94%|#########3| 103/110 [01:15<00:05,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:  95%|#########4| 104/110 [01:15<00:04,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  95%|#########5| 105/110 [01:16<00:03,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  96%|#########6| 106/110 [01:17<00:03,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  97%|#########7| 107/110 [01:18<00:02,  1.30it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  98%|#########8| 108/110 [01:19<00:01,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  99%|#########9| 109/110 [01:19<00:00,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||: 100%|##########| 110/110 [01:20<00:00,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||: 100%|##########| 110/110 [01:20<00:00,  1.37it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7204, acc: 0.5938, no_result: 0.1250, loss: 1.3098 ||:   4%|4         | 1/24 [00:00<00:09,  2.33it/s]
+BLEU: 0.7059, acc: 0.6406, no_result: 0.1250, loss: 1.2282 ||:   8%|8         | 2/24 [00:00<00:09,  2.41it/s]
+BLEU: 0.6898, acc: 0.6146, no_result: 0.1458, loss: 1.3605 ||:  12%|#2        | 3/24 [00:01<00:08,  2.40it/s]
+BLEU: 0.7139, acc: 0.6328, no_result: 0.1406, loss: 1.2634 ||:  17%|#6        | 4/24 [00:01<00:08,  2.35it/s]
+BLEU: 0.7360, acc: 0.6625, no_result: 0.1375, loss: 1.2129 ||:  21%|##        | 5/24 [00:02<00:08,  2.32it/s]
+BLEU: 0.7327, acc: 0.6458, no_result: 0.1250, loss: 1.2089 ||:  25%|##5       | 6/24 [00:02<00:07,  2.40it/s]
+BLEU: 0.7430, acc: 0.6607, no_result: 0.1161, loss: 1.1877 ||:  29%|##9       | 7/24 [00:02<00:06,  2.52it/s]
+BLEU: 0.7432, acc: 0.6680, no_result: 0.1133, loss: 1.2163 ||:  33%|###3      | 8/24 [00:03<00:06,  2.52it/s]
+BLEU: 0.7334, acc: 0.6771, no_result: 0.1111, loss: 1.2122 ||:  38%|###7      | 9/24 [00:03<00:06,  2.46it/s]
+BLEU: 0.7428, acc: 0.6656, no_result: 0.1000, loss: 1.2317 ||:  42%|####1     | 10/24 [00:04<00:05,  2.42it/s]
+BLEU: 0.7409, acc: 0.6591, no_result: 0.1023, loss: 1.2851 ||:  46%|####5     | 11/24 [00:04<00:05,  2.31it/s]
+BLEU: 0.7358, acc: 0.6667, no_result: 0.0990, loss: 1.2993 ||:  50%|#####     | 12/24 [00:04<00:05,  2.38it/s]
+BLEU: 0.7347, acc: 0.6659, no_result: 0.1058, loss: 1.3252 ||:  54%|#####4    | 13/24 [00:05<00:04,  2.30it/s]
+BLEU: 0.7333, acc: 0.6540, no_result: 0.1004, loss: 1.3750 ||:  58%|#####8    | 14/24 [00:05<00:04,  2.22it/s]
+BLEU: 0.7256, acc: 0.6354, no_result: 0.1229, loss: 1.4359 ||:  62%|######2   | 15/24 [00:06<00:04,  2.06it/s]
+BLEU: 0.7234, acc: 0.6289, no_result: 0.1270, loss: 1.4598 ||:  67%|######6   | 16/24 [00:07<00:03,  2.02it/s]
+BLEU: 0.7207, acc: 0.6232, no_result: 0.1342, loss: 1.4469 ||:  71%|#######   | 17/24 [00:07<00:03,  2.08it/s]
+BLEU: 0.7177, acc: 0.6198, no_result: 0.1389, loss: 1.4596 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.06it/s]
+BLEU: 0.7207, acc: 0.6283, no_result: 0.1349, loss: 1.4371 ||:  79%|#######9  | 19/24 [00:08<00:02,  2.15it/s]
+BLEU: 0.7237, acc: 0.6328, no_result: 0.1313, loss: 1.4234 ||:  83%|########3 | 20/24 [00:08<00:01,  2.27it/s]
+BLEU: 0.7214, acc: 0.6310, no_result: 0.1265, loss: 1.4292 ||:  88%|########7 | 21/24 [00:09<00:01,  2.29it/s]
+BLEU: 0.7278, acc: 0.6278, no_result: 0.1250, loss: 1.4231 ||:  92%|#########1| 22/24 [00:09<00:00,  2.27it/s]
+BLEU: 0.7306, acc: 0.6345, no_result: 0.1209, loss: 1.3980 ||:  96%|#########5| 23/24 [00:10<00:00,  2.34it/s]
+BLEU: 0.7302, acc: 0.6405, no_result: 0.1159, loss: 1.4221 ||: 100%|##########| 24/24 [00:10<00:00,  2.88it/s]
+BLEU: 0.7302, acc: 0.6405, no_result: 0.1159, loss: 1.4221 ||: 100%|##########| 24/24 [00:10<00:00,  2.35it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0119 ||:   1%|          | 1/110 [00:00<01:26,  1.26it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0158 ||:   2%|1         | 2/110 [00:01<01:22,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:   3%|2         | 3/110 [00:02<01:15,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0145 ||:   4%|3         | 4/110 [00:02<01:12,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:   5%|4         | 5/110 [00:03<01:12,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:   5%|5         | 6/110 [00:04<01:11,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0133 ||:   6%|6         | 7/110 [00:04<01:13,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:   7%|7         | 8/110 [00:05<01:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0132 ||:   8%|8         | 9/110 [00:06<01:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:   9%|9         | 10/110 [00:06<01:06,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  10%|#         | 11/110 [00:07<01:08,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  11%|#         | 12/110 [00:08<01:06,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  12%|#1        | 13/110 [00:08<01:02,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0131 ||:  13%|#2        | 14/110 [00:09<00:59,  1.61it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0133 ||:  14%|#3        | 15/110 [00:10<00:59,  1.59it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0130 ||:  15%|#4        | 16/110 [00:10<00:59,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  15%|#5        | 17/110 [00:11<00:59,  1.56it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0134 ||:  16%|#6        | 18/110 [00:12<01:00,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0132 ||:  17%|#7        | 19/110 [00:12<01:00,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  18%|#8        | 20/110 [00:14<01:18,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  19%|#9        | 21/110 [00:14<01:17,  1.15it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  20%|##        | 22/110 [00:15<01:10,  1.24it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  21%|##        | 23/110 [00:16<01:06,  1.31it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  22%|##1       | 24/110 [00:17<01:04,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  23%|##2       | 25/110 [00:17<01:00,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  24%|##3       | 26/110 [00:18<01:01,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  25%|##4       | 27/110 [00:19<00:59,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  25%|##5       | 28/110 [00:19<00:56,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  26%|##6       | 29/110 [00:20<00:56,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  27%|##7       | 30/110 [00:21<00:54,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  28%|##8       | 31/110 [00:21<00:56,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  29%|##9       | 32/110 [00:22<00:56,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  30%|###       | 33/110 [00:23<00:54,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  31%|###       | 34/110 [00:23<00:52,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  32%|###1      | 35/110 [00:24<00:51,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0134 ||:  33%|###2      | 36/110 [00:25<00:55,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  34%|###3      | 37/110 [00:26<00:52,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  35%|###4      | 38/110 [00:26<00:50,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  35%|###5      | 39/110 [00:27<00:49,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  36%|###6      | 40/110 [00:28<00:46,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  37%|###7      | 41/110 [00:28<00:45,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  38%|###8      | 42/110 [00:29<00:45,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  39%|###9      | 43/110 [00:30<00:45,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  40%|####      | 44/110 [00:30<00:46,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  41%|####      | 45/110 [00:31<00:43,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  42%|####1     | 46/110 [00:32<00:42,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  43%|####2     | 47/110 [00:32<00:41,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  44%|####3     | 48/110 [00:33<00:40,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  45%|####4     | 49/110 [00:34<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  45%|####5     | 50/110 [00:34<00:39,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  46%|####6     | 51/110 [00:35<00:38,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  47%|####7     | 52/110 [00:36<00:40,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  48%|####8     | 53/110 [00:36<00:38,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0142 ||:  49%|####9     | 54/110 [00:37<00:36,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0143 ||:  50%|#####     | 55/110 [00:38<00:35,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  51%|#####     | 56/110 [00:38<00:34,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  52%|#####1    | 57/110 [00:39<00:33,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0147 ||:  53%|#####2    | 58/110 [00:39<00:32,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  55%|#####4    | 60/110 [00:41<00:32,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  55%|#####5    | 61/110 [00:41<00:32,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  56%|#####6    | 62/110 [00:42<00:35,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  57%|#####7    | 63/110 [00:43<00:34,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0148 ||:  58%|#####8    | 64/110 [00:44<00:34,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0149 ||:  59%|#####9    | 65/110 [00:45<00:32,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  60%|######    | 66/110 [00:45<00:30,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  61%|######    | 67/110 [00:46<00:29,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  62%|######1   | 68/110 [00:47<00:30,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  63%|######2   | 69/110 [00:47<00:29,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  64%|######3   | 70/110 [00:48<00:27,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  65%|######4   | 71/110 [00:49<00:27,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  65%|######5   | 72/110 [00:49<00:25,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  66%|######6   | 73/110 [00:50<00:24,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  67%|######7   | 74/110 [00:51<00:24,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  68%|######8   | 75/110 [00:51<00:23,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  69%|######9   | 76/110 [00:52<00:21,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  70%|#######   | 77/110 [00:53<00:20,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  71%|#######   | 78/110 [00:53<00:21,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  72%|#######1  | 79/110 [00:54<00:21,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  73%|#######2  | 80/110 [00:55<00:20,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0157 ||:  74%|#######3  | 81/110 [00:55<00:20,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  75%|#######4  | 82/110 [00:56<00:19,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  75%|#######5  | 83/110 [00:57<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0156 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  77%|#######7  | 85/110 [00:58<00:17,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  78%|#######8  | 86/110 [00:59<00:16,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  79%|#######9  | 87/110 [00:59<00:15,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  80%|########  | 88/110 [01:00<00:14,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  81%|########  | 89/110 [01:01<00:13,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  82%|########1 | 90/110 [01:01<00:13,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0151 ||:  83%|########2 | 91/110 [01:02<00:12,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  84%|########3 | 92/110 [01:03<00:12,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  85%|########4 | 93/110 [01:04<00:12,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0150 ||:  85%|########5 | 94/110 [01:04<00:11,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  86%|########6 | 95/110 [01:05<00:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0152 ||:  87%|########7 | 96/110 [01:06<00:10,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  88%|########8 | 97/110 [01:06<00:09,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  89%|########9 | 98/110 [01:07<00:08,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  90%|######### | 99/110 [01:08<00:07,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  91%|######### | 100/110 [01:08<00:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  92%|#########1| 101/110 [01:09<00:06,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  93%|#########2| 102/110 [01:10<00:05,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  94%|#########3| 103/110 [01:10<00:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  95%|#########4| 104/110 [01:11<00:04,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  95%|#########5| 105/110 [01:12<00:03,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0155 ||:  96%|#########6| 106/110 [01:13<00:02,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0154 ||:  97%|#########7| 107/110 [01:13<00:02,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  98%|#########8| 108/110 [01:14<00:01,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||:  99%|#########9| 109/110 [01:15<00:00,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||: 100%|##########| 110/110 [01:15<00:00,  1.78it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0153 ||: 100%|##########| 110/110 [01:15<00:00,  1.46it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.3019 ||:   4%|4         | 1/24 [00:00<00:08,  2.66it/s]
+BLEU: 0.7081, acc: 0.6406, no_result: 0.1250, loss: 1.2282 ||:   8%|8         | 2/24 [00:00<00:07,  2.76it/s]
+BLEU: 0.6856, acc: 0.6146, no_result: 0.1458, loss: 1.3622 ||:  12%|#2        | 3/24 [00:01<00:07,  2.79it/s]
+BLEU: 0.7109, acc: 0.6328, no_result: 0.1406, loss: 1.2662 ||:  17%|#6        | 4/24 [00:01<00:07,  2.71it/s]
+BLEU: 0.7335, acc: 0.6625, no_result: 0.1375, loss: 1.2119 ||:  21%|##        | 5/24 [00:01<00:07,  2.66it/s]
+BLEU: 0.7341, acc: 0.6458, no_result: 0.1302, loss: 1.2072 ||:  25%|##5       | 6/24 [00:02<00:06,  2.76it/s]
+BLEU: 0.7441, acc: 0.6607, no_result: 0.1205, loss: 1.1852 ||:  29%|##9       | 7/24 [00:02<00:05,  2.90it/s]
+BLEU: 0.7377, acc: 0.6680, no_result: 0.1172, loss: 1.2141 ||:  33%|###3      | 8/24 [00:02<00:05,  2.90it/s]
+BLEU: 0.7285, acc: 0.6736, no_result: 0.1181, loss: 1.2099 ||:  38%|###7      | 9/24 [00:03<00:05,  2.84it/s]
+BLEU: 0.7381, acc: 0.6625, no_result: 0.1062, loss: 1.2286 ||:  42%|####1     | 10/24 [00:03<00:05,  2.72it/s]
+BLEU: 0.7368, acc: 0.6534, no_result: 0.1080, loss: 1.2815 ||:  46%|####5     | 11/24 [00:04<00:04,  2.63it/s]
+BLEU: 0.7319, acc: 0.6615, no_result: 0.1042, loss: 1.2957 ||:  50%|#####     | 12/24 [00:04<00:04,  2.74it/s]
+BLEU: 0.7312, acc: 0.6611, no_result: 0.1106, loss: 1.3218 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.65it/s]
+BLEU: 0.7297, acc: 0.6496, no_result: 0.1049, loss: 1.3712 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.58it/s]
+BLEU: 0.7235, acc: 0.6333, no_result: 0.1271, loss: 1.4323 ||:  62%|######2   | 15/24 [00:05<00:03,  2.38it/s]
+BLEU: 0.7192, acc: 0.6270, no_result: 0.1289, loss: 1.4564 ||:  67%|######6   | 16/24 [00:06<00:03,  2.35it/s]
+BLEU: 0.7174, acc: 0.6213, no_result: 0.1360, loss: 1.4431 ||:  71%|#######   | 17/24 [00:06<00:02,  2.40it/s]
+BLEU: 0.7153, acc: 0.6181, no_result: 0.1406, loss: 1.4576 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.38it/s]
+BLEU: 0.7184, acc: 0.6266, no_result: 0.1365, loss: 1.4343 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.46it/s]
+BLEU: 0.7214, acc: 0.6312, no_result: 0.1328, loss: 1.4202 ||:  83%|########3 | 20/24 [00:07<00:01,  2.61it/s]
+BLEU: 0.7193, acc: 0.6295, no_result: 0.1280, loss: 1.4256 ||:  88%|########7 | 21/24 [00:08<00:01,  2.62it/s]
+BLEU: 0.7260, acc: 0.6293, no_result: 0.1250, loss: 1.4194 ||:  92%|#########1| 22/24 [00:08<00:00,  2.61it/s]
+BLEU: 0.7292, acc: 0.6372, no_result: 0.1196, loss: 1.3942 ||:  96%|#########5| 23/24 [00:08<00:00,  2.69it/s]
+BLEU: 0.7290, acc: 0.6385, no_result: 0.1192, loss: 1.4180 ||: 100%|##########| 24/24 [00:08<00:00,  3.33it/s]
+BLEU: 0.7290, acc: 0.6385, no_result: 0.1192, loss: 1.4180 ||: 100%|##########| 24/24 [00:08<00:00,  2.70it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0049 ||:   1%|          | 1/110 [00:00<01:14,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0086 ||:   2%|1         | 2/110 [00:01<01:10,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0114 ||:   3%|2         | 3/110 [00:01<01:09,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0099 ||:   4%|3         | 4/110 [00:02<01:15,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0102 ||:   5%|4         | 5/110 [00:03<01:09,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0100 ||:   5%|5         | 6/110 [00:04<01:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0099 ||:   6%|6         | 7/110 [00:04<01:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0104 ||:   7%|7         | 8/110 [00:05<01:09,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0107 ||:   8%|8         | 9/110 [00:06<01:07,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0108 ||:   9%|9         | 10/110 [00:07<01:26,  1.16it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0112 ||:  10%|#         | 11/110 [00:08<01:18,  1.27it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0122 ||:  11%|#         | 12/110 [00:08<01:14,  1.32it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0120 ||:  12%|#1        | 13/110 [00:09<01:09,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0117 ||:  13%|#2        | 14/110 [00:09<01:06,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0117 ||:  14%|#3        | 15/110 [00:10<01:06,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0116 ||:  15%|#4        | 16/110 [00:11<01:06,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0116 ||:  15%|#5        | 17/110 [00:12<01:07,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0116 ||:  16%|#6        | 18/110 [00:12<01:05,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0113 ||:  17%|#7        | 19/110 [00:13<01:06,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0114 ||:  18%|#8        | 20/110 [00:14<01:03,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0115 ||:  19%|#9        | 21/110 [00:14<01:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0113 ||:  20%|##        | 22/110 [00:15<00:58,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0118 ||:  21%|##        | 23/110 [00:16<00:59,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0123 ||:  22%|##1       | 24/110 [00:16<00:58,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0123 ||:  23%|##2       | 25/110 [00:17<00:56,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0122 ||:  24%|##3       | 26/110 [00:18<00:54,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0126 ||:  25%|##4       | 27/110 [00:18<00:53,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0125 ||:  25%|##5       | 28/110 [00:19<00:53,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0124 ||:  26%|##6       | 29/110 [00:20<00:53,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0126 ||:  27%|##7       | 30/110 [00:20<00:52,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0130 ||:  28%|##8       | 31/110 [00:21<00:51,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0130 ||:  29%|##9       | 32/110 [00:22<00:49,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0128 ||:  30%|###       | 33/110 [00:22<00:50,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0130 ||:  31%|###       | 34/110 [00:23<00:52,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0132 ||:  32%|###1      | 35/110 [00:24<00:50,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0134 ||:  33%|###2      | 36/110 [00:24<00:49,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0132 ||:  34%|###3      | 37/110 [00:25<00:51,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0131 ||:  35%|###4      | 38/110 [00:26<00:48,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  35%|###5      | 39/110 [00:26<00:47,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  36%|###6      | 40/110 [00:27<00:46,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  37%|###7      | 41/110 [00:28<00:46,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  38%|###8      | 42/110 [00:28<00:47,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  39%|###9      | 43/110 [00:29<00:49,  1.36it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  40%|####      | 44/110 [00:30<00:46,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  41%|####      | 45/110 [00:31<00:45,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  42%|####1     | 46/110 [00:31<00:43,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  43%|####2     | 47/110 [00:32<00:43,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  44%|####3     | 48/110 [00:33<00:41,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  45%|####4     | 49/110 [00:33<00:41,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  45%|####5     | 50/110 [00:34<00:40,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  46%|####6     | 51/110 [00:35<00:38,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  47%|####7     | 52/110 [00:35<00:41,  1.39it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  48%|####8     | 53/110 [00:36<00:39,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  49%|####9     | 54/110 [00:37<00:37,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  50%|#####     | 55/110 [00:37<00:38,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  51%|#####     | 56/110 [00:38<00:37,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  52%|#####1    | 57/110 [00:39<00:37,  1.42it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  53%|#####2    | 58/110 [00:39<00:34,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  54%|#####3    | 59/110 [00:40<00:33,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  55%|#####4    | 60/110 [00:41<00:32,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  55%|#####5    | 61/110 [00:41<00:31,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  56%|#####6    | 62/110 [00:42<00:31,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  57%|#####7    | 63/110 [00:43<00:30,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  58%|#####8    | 64/110 [00:43<00:29,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  59%|#####9    | 65/110 [00:44<00:28,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  60%|######    | 66/110 [00:45<00:28,  1.57it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  61%|######    | 67/110 [00:45<00:31,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  62%|######1   | 68/110 [00:46<00:29,  1.40it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  63%|######2   | 69/110 [00:47<00:27,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  64%|######3   | 70/110 [00:47<00:27,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  65%|######4   | 71/110 [00:48<00:25,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  65%|######5   | 72/110 [00:49<00:24,  1.58it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  66%|######6   | 73/110 [00:49<00:24,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  67%|######7   | 74/110 [00:50<00:23,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  68%|######8   | 75/110 [00:51<00:22,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  69%|######9   | 76/110 [00:51<00:22,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  70%|#######   | 77/110 [00:52<00:22,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  71%|#######   | 78/110 [00:53<00:21,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0135 ||:  72%|#######1  | 79/110 [00:53<00:20,  1.49it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0134 ||:  73%|#######2  | 80/110 [00:54<00:20,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0134 ||:  74%|#######3  | 81/110 [00:55<00:19,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0133 ||:  75%|#######4  | 82/110 [00:55<00:18,  1.53it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0136 ||:  75%|#######5  | 83/110 [00:56<00:17,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  76%|#######6  | 84/110 [00:57<00:17,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0137 ||:  77%|#######7  | 85/110 [00:58<00:18,  1.35it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  78%|#######8  | 86/110 [00:58<00:18,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  79%|#######9  | 87/110 [00:59<00:17,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  80%|########  | 88/110 [01:00<00:16,  1.37it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  81%|########  | 89/110 [01:00<00:14,  1.41it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||:  82%|########1 | 90/110 [01:01<00:14,  1.34it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  83%|########2 | 91/110 [01:02<00:14,  1.33it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  84%|########3 | 92/110 [01:03<00:13,  1.38it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  85%|########4 | 93/110 [01:03<00:11,  1.44it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  85%|########5 | 94/110 [01:04<00:10,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  86%|########6 | 95/110 [01:05<00:10,  1.46it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0141 ||:  87%|########7 | 96/110 [01:05<00:09,  1.43it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  88%|########8 | 97/110 [01:06<00:08,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  89%|########9 | 98/110 [01:07<00:07,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  90%|######### | 99/110 [01:07<00:07,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  91%|######### | 100/110 [01:08<00:06,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  92%|#########1| 101/110 [01:09<00:05,  1.55it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  93%|#########2| 102/110 [01:09<00:05,  1.48it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  94%|#########3| 103/110 [01:10<00:04,  1.54it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  95%|#########4| 104/110 [01:11<00:03,  1.52it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  95%|#########5| 105/110 [01:11<00:03,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  96%|#########6| 106/110 [01:12<00:02,  1.50it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0140 ||:  97%|#########7| 107/110 [01:13<00:01,  1.51it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  98%|#########8| 108/110 [01:13<00:01,  1.45it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0139 ||:  99%|#########9| 109/110 [01:14<00:00,  1.47it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||: 100%|##########| 110/110 [01:15<00:00,  1.29it/s]
+acc: 0.0000, no_result: 0.0000, loss: 0.0138 ||: 100%|##########| 110/110 [01:15<00:00,  1.46it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.7253, acc: 0.6250, no_result: 0.1250, loss: 1.3101 ||:   4%|4         | 1/24 [00:00<00:08,  2.68it/s]
+BLEU: 0.7146, acc: 0.6562, no_result: 0.1250, loss: 1.2302 ||:   8%|8         | 2/24 [00:00<00:07,  2.79it/s]
+BLEU: 0.6899, acc: 0.6250, no_result: 0.1562, loss: 1.3630 ||:  12%|#2        | 3/24 [00:01<00:07,  2.80it/s]
+BLEU: 0.7140, acc: 0.6406, no_result: 0.1484, loss: 1.2676 ||:  17%|#6        | 4/24 [00:01<00:07,  2.70it/s]
+BLEU: 0.7366, acc: 0.6687, no_result: 0.1437, loss: 1.2130 ||:  21%|##        | 5/24 [00:01<00:07,  2.67it/s]
+BLEU: 0.7355, acc: 0.6510, no_result: 0.1354, loss: 1.2056 ||:  25%|##5       | 6/24 [00:02<00:06,  2.76it/s]
+BLEU: 0.7454, acc: 0.6652, no_result: 0.1250, loss: 1.1850 ||:  29%|##9       | 7/24 [00:02<00:05,  2.89it/s]
+BLEU: 0.7437, acc: 0.6719, no_result: 0.1211, loss: 1.2135 ||:  33%|###3      | 8/24 [00:02<00:05,  2.91it/s]
+BLEU: 0.7338, acc: 0.6771, no_result: 0.1215, loss: 1.2099 ||:  38%|###7      | 9/24 [00:03<00:05,  2.82it/s]
+BLEU: 0.7426, acc: 0.6656, no_result: 0.1094, loss: 1.2287 ||:  42%|####1     | 10/24 [00:03<00:05,  2.78it/s]
+BLEU: 0.7396, acc: 0.6562, no_result: 0.1136, loss: 1.2801 ||:  46%|####5     | 11/24 [00:03<00:04,  2.67it/s]
+BLEU: 0.7341, acc: 0.6641, no_result: 0.1094, loss: 1.2941 ||:  50%|#####     | 12/24 [00:04<00:04,  2.76it/s]
+BLEU: 0.7327, acc: 0.6635, no_result: 0.1154, loss: 1.3198 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.67it/s]
+BLEU: 0.7311, acc: 0.6518, no_result: 0.1094, loss: 1.3684 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.59it/s]
+BLEU: 0.7232, acc: 0.6333, no_result: 0.1313, loss: 1.4300 ||:  62%|######2   | 15/24 [00:05<00:03,  2.39it/s]
+BLEU: 0.7189, acc: 0.6270, no_result: 0.1328, loss: 1.4539 ||:  67%|######6   | 16/24 [00:06<00:03,  2.35it/s]
+BLEU: 0.7171, acc: 0.6213, no_result: 0.1415, loss: 1.4408 ||:  71%|#######   | 17/24 [00:06<00:02,  2.42it/s]
+BLEU: 0.7150, acc: 0.6181, no_result: 0.1458, loss: 1.4550 ||:  75%|#######5  | 18/24 [00:06<00:02,  2.40it/s]
+BLEU: 0.7171, acc: 0.6250, no_result: 0.1414, loss: 1.4324 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.49it/s]
+BLEU: 0.7193, acc: 0.6297, no_result: 0.1375, loss: 1.4188 ||:  83%|########3 | 20/24 [00:07<00:01,  2.64it/s]
+BLEU: 0.7172, acc: 0.6280, no_result: 0.1324, loss: 1.4243 ||:  88%|########7 | 21/24 [00:07<00:01,  2.64it/s]
+BLEU: 0.7241, acc: 0.6278, no_result: 0.1293, loss: 1.4178 ||:  92%|#########1| 22/24 [00:08<00:00,  2.62it/s]
+BLEU: 0.7268, acc: 0.6359, no_result: 0.1236, loss: 1.3926 ||:  96%|#########5| 23/24 [00:08<00:00,  2.71it/s]
+BLEU: 0.7267, acc: 0.6418, no_result: 0.1185, loss: 1.4167 ||: 100%|##########| 24/24 [00:08<00:00,  3.36it/s]
+BLEU: 0.7267, acc: 0.6418, no_result: 0.1185, loss: 1.4167 ||: 100%|##########| 24/24 [00:08<00:00,  2.72it/s]
+
+  0%|          | 0/24 [00:00<?, ?it/s]
+BLEU: 0.67, acc: 0.75, no_result: 0.03, loss: 0.98 ||:   4%|4         | 1/24 [00:00<00:08,  2.67it/s]
+BLEU: 0.68, acc: 0.72, no_result: 0.03, loss: 1.03 ||:   8%|8         | 2/24 [00:00<00:07,  2.92it/s]
+BLEU: 0.66, acc: 0.73, no_result: 0.04, loss: 0.99 ||:  12%|#2        | 3/24 [00:01<00:07,  2.93it/s]
+BLEU: 0.68, acc: 0.70, no_result: 0.09, loss: 1.05 ||:  17%|#6        | 4/24 [00:01<00:07,  2.69it/s]
+BLEU: 0.68, acc: 0.66, no_result: 0.12, loss: 1.12 ||:  21%|##        | 5/24 [00:01<00:07,  2.46it/s]
+BLEU: 0.70, acc: 0.64, no_result: 0.11, loss: 1.08 ||:  25%|##5       | 6/24 [00:02<00:07,  2.30it/s]
+BLEU: 0.71, acc: 0.66, no_result: 0.10, loss: 1.02 ||:  29%|##9       | 7/24 [00:02<00:06,  2.51it/s]
+BLEU: 0.72, acc: 0.68, no_result: 0.09, loss: 0.97 ||:  33%|###3      | 8/24 [00:03<00:05,  2.68it/s]
+BLEU: 0.72, acc: 0.68, no_result: 0.09, loss: 0.98 ||:  38%|###7      | 9/24 [00:03<00:05,  2.73it/s]
+BLEU: 0.72, acc: 0.69, no_result: 0.09, loss: 1.00 ||:  42%|####1     | 10/24 [00:03<00:05,  2.75it/s]
+BLEU: 0.72, acc: 0.68, no_result: 0.10, loss: 1.05 ||:  46%|####5     | 11/24 [00:04<00:04,  2.62it/s]
+BLEU: 0.72, acc: 0.67, no_result: 0.10, loss: 1.09 ||:  50%|#####     | 12/24 [00:04<00:04,  2.49it/s]
+BLEU: 0.71, acc: 0.67, no_result: 0.10, loss: 1.10 ||:  54%|#####4    | 13/24 [00:04<00:04,  2.61it/s]
+BLEU: 0.71, acc: 0.66, no_result: 0.10, loss: 1.11 ||:  58%|#####8    | 14/24 [00:05<00:03,  2.61it/s]
+BLEU: 0.70, acc: 0.64, no_result: 0.10, loss: 1.17 ||:  62%|######2   | 15/24 [00:05<00:03,  2.52it/s]
+BLEU: 0.70, acc: 0.64, no_result: 0.11, loss: 1.20 ||:  67%|######6   | 16/24 [00:06<00:03,  2.43it/s]
+BLEU: 0.70, acc: 0.63, no_result: 0.12, loss: 1.23 ||:  71%|#######   | 17/24 [00:06<00:02,  2.38it/s]
+BLEU: 0.70, acc: 0.63, no_result: 0.12, loss: 1.25 ||:  75%|#######5  | 18/24 [00:07<00:02,  2.41it/s]
+BLEU: 0.70, acc: 0.63, no_result: 0.12, loss: 1.25 ||:  79%|#######9  | 19/24 [00:07<00:02,  2.47it/s]
+BLEU: 0.70, acc: 0.63, no_result: 0.12, loss: 1.25 ||:  83%|########3 | 20/24 [00:07<00:01,  2.57it/s]
+BLEU: 0.70, acc: 0.63, no_result: 0.12, loss: 1.26 ||:  88%|########7 | 21/24 [00:08<00:01,  2.61it/s]
+BLEU: 0.70, acc: 0.63, no_result: 0.12, loss: 1.26 ||:  92%|#########1| 22/24 [00:08<00:00,  2.57it/s]
+BLEU: 0.71, acc: 0.64, no_result: 0.12, loss: 1.23 ||:  96%|#########5| 23/24 [00:08<00:00,  2.69it/s]
+BLEU: 0.70, acc: 0.64, no_result: 0.12, loss: 1.23 ||: 100%|##########| 24/24 [00:09<00:00,  3.10it/s]
+BLEU: 0.70, acc: 0.64, no_result: 0.12, loss: 1.23 ||: 100%|##########| 24/24 [00:09<00:00,  2.63it/s]
+
diff --git a/stdout.log b/stdout.log
new file mode 100644
index 0000000000000000000000000000000000000000..445a86bbc603d90adc8870be0bf70bf11f4cb3f0
--- /dev/null
+++ b/stdout.log
@@ -0,0 +1,3752 @@
+2025-03-24 13:21:27,530 - INFO - allennlp.common.params - random_seed = 13370
+2025-03-24 13:21:27,531 - INFO - allennlp.common.params - numpy_seed = 1337
+2025-03-24 13:21:27,531 - INFO - allennlp.common.params - pytorch_seed = 133
+2025-03-24 13:21:27,531 - INFO - allennlp.common.checks - Pytorch version: 1.5.1
+2025-03-24 13:21:27,584 - INFO - allennlp.common.params - evaluate_on_test = True
+2025-03-24 13:21:27,584 - INFO - allennlp.common.params - validation_dataset_reader = None
+2025-03-24 13:21:27,584 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.data.dataset_readers.dataset_reader.DatasetReader'> from params {'source_token_indexer': {'tokens': {'do_lowercase': False, 'model_name': './roberta', 'type': 'pretrained_transformer'}}, 'target_token_indexer': {'tokens': {'type': 'single_id'}}, 'tokenizer': {'word_splitter': {'type': 'just_spaces'}}, 'type': 's2s_manual_reader'} and extras set()
+2025-03-24 13:21:27,584 - INFO - allennlp.common.params - dataset_reader.type = s2s_manual_reader
+2025-03-24 13:21:27,585 - INFO - allennlp.common.from_params - instantiating class <class 'utils.SeqReader'> from params {'source_token_indexer': {'tokens': {'do_lowercase': False, 'model_name': './roberta', 'type': 'pretrained_transformer'}}, 'target_token_indexer': {'tokens': {'type': 'single_id'}}, 'tokenizer': {'word_splitter': {'type': 'just_spaces'}}} and extras set()
+2025-03-24 13:21:27,585 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.data.tokenizers.tokenizer.Tokenizer'> from params {'word_splitter': {'type': 'just_spaces'}} and extras set()
+2025-03-24 13:21:27,585 - INFO - allennlp.common.params - dataset_reader.tokenizer.type = word
+2025-03-24 13:21:27,585 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.data.tokenizers.word_tokenizer.WordTokenizer'> from params {'word_splitter': {'type': 'just_spaces'}} and extras set()
+2025-03-24 13:21:27,585 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.data.tokenizers.word_splitter.WordSplitter'> from params {'type': 'just_spaces'} and extras set()
+2025-03-24 13:21:27,585 - INFO - allennlp.common.params - dataset_reader.tokenizer.word_splitter.type = just_spaces
+2025-03-24 13:21:27,585 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.data.tokenizers.word_splitter.JustSpacesWordSplitter'> from params {} and extras set()
+2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.tokenizer.start_tokens = None
+2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.tokenizer.end_tokens = None
+2025-03-24 13:21:27,586 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.data.token_indexers.token_indexer.TokenIndexer'> from params {'do_lowercase': False, 'model_name': './roberta', 'type': 'pretrained_transformer'} and extras set()
+2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.source_token_indexer.tokens.type = pretrained_transformer
+2025-03-24 13:21:27,586 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.data.token_indexers.pretrained_transformer_indexer.PretrainedTransformerIndexer'> from params {'do_lowercase': False, 'model_name': './roberta'} and extras set()
+2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.source_token_indexer.tokens.model_name = ./roberta
+2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.source_token_indexer.tokens.do_lowercase = False
+2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.source_token_indexer.tokens.namespace = tags
+2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.source_token_indexer.tokens.token_min_padding_length = 0
+2025-03-24 13:21:27,586 - INFO - pytorch_transformers.tokenization_utils - Model name './roberta' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc). Assuming './roberta' is a path or url to a directory containing tokenizer files.
+2025-03-24 13:21:27,588 - INFO - pytorch_transformers.tokenization_utils - loading file ./roberta/vocab.txt
+2025-03-24 13:21:27,588 - INFO - pytorch_transformers.tokenization_utils - loading file ./roberta/added_tokens.json
+2025-03-24 13:21:27,588 - INFO - pytorch_transformers.tokenization_utils - loading file ./roberta/special_tokens_map.json
+2025-03-24 13:21:27,611 - INFO - allennlp.data.token_indexers.pretrained_transformer_indexer - Using token indexer padding value of 0
+2025-03-24 13:21:27,611 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.data.token_indexers.token_indexer.TokenIndexer'> from params {'type': 'single_id'} and extras set()
+2025-03-24 13:21:27,611 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.type = single_id
+2025-03-24 13:21:27,611 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.data.token_indexers.single_id_token_indexer.SingleIdTokenIndexer'> from params {} and extras set()
+2025-03-24 13:21:27,611 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.namespace = tokens
+2025-03-24 13:21:27,612 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.lowercase_tokens = False
+2025-03-24 13:21:27,612 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.start_tokens = None
+2025-03-24 13:21:27,612 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.end_tokens = None
+2025-03-24 13:21:27,612 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.token_min_padding_length = 0
+2025-03-24 13:21:27,612 - INFO - allennlp.common.params - dataset_reader.model_name = None
+2025-03-24 13:21:27,613 - INFO - allennlp.common.params - train_data_path = ./GeoQA-Data/GeoQA-Pro/pro_train.pk
+2025-03-24 13:21:27,613 - INFO - allennlp.training.util - Reading training data from ./GeoQA-Data/GeoQA-Pro/pro_train.pk
+2025-03-24 13:21:32,260 - INFO - allennlp.common.params - validation_data_path = ./GeoQA-Data/GeoQA-Pro/pro_dev.pk
+2025-03-24 13:21:32,261 - INFO - allennlp.training.util - Reading validation data from ./GeoQA-Data/GeoQA-Pro/pro_dev.pk
+2025-03-24 13:21:33,170 - INFO - allennlp.common.params - test_data_path = ./GeoQA-Data/GeoQA-Pro/pro_test.pk
+2025-03-24 13:21:33,170 - INFO - allennlp.training.util - Reading test data from ./GeoQA-Data/GeoQA-Pro/pro_test.pk
+2025-03-24 13:21:34,300 - INFO - allennlp.training.trainer_pieces - From dataset instances, validation, test, train will be considered for vocabulary creation.
+2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.type = None
+2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.extend = False
+2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.directory_path = None
+2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.min_count = None
+2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.max_vocab_size = None
+2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.non_padded_namespaces = ('*tags', '*labels')
+2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.pretrained_files = {}
+2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.min_pretrained_embeddings = None
+2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.only_include_pretrained_words = False
+2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.tokens_to_add = None
+2025-03-24 13:21:34,301 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.
+2025-03-24 13:21:34,382 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.models.model.Model'> from params {'beam_size': 10, 'encoder': {'dropout': 0.5, 'emb_dim': 768, 'hid_dim': 512, 'input_dim': 21128}, 'knowledge_points_ratio': 0, 'max_decoding_steps': 16, 'resnet_pretrained': './', 'scheduled_sampling_ratio': 0, 'source_embedder': {'token_embedders': {}}, 'target_embedding_dim': 512, 'type': 'geo_s2s'} and extras {'vocab'}
+2025-03-24 13:21:34,382 - INFO - allennlp.common.params - model.type = geo_s2s
+2025-03-24 13:21:34,382 - INFO - allennlp.common.from_params - instantiating class <class 'DPE.SimpleSeq2Seq'> from params {'beam_size': 10, 'encoder': {'dropout': 0.5, 'emb_dim': 768, 'hid_dim': 512, 'input_dim': 21128}, 'knowledge_points_ratio': 0, 'max_decoding_steps': 16, 'resnet_pretrained': './', 'scheduled_sampling_ratio': 0, 'source_embedder': {'token_embedders': {}}, 'target_embedding_dim': 512} and extras {'vocab'}
+2025-03-24 13:21:34,383 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.modules.text_field_embedders.text_field_embedder.TextFieldEmbedder'> from params {'token_embedders': {}} and extras {'vocab'}
+2025-03-24 13:21:34,383 - INFO - allennlp.common.params - model.source_embedder.type = basic
+2025-03-24 13:21:34,383 - INFO - allennlp.common.params - model.source_embedder.embedder_to_indexer_map = None
+2025-03-24 13:21:34,383 - INFO - allennlp.common.params - model.source_embedder.allow_unmatched_keys = False
+2025-03-24 13:21:34,383 - INFO - allennlp.common.from_params - instantiating class <class 'DPE.Encoder'> from params {'dropout': 0.5, 'emb_dim': 768, 'hid_dim': 512, 'input_dim': 21128} and extras {'vocab'}
+2025-03-24 13:21:34,383 - INFO - allennlp.common.params - model.encoder.input_dim = 21128
+2025-03-24 13:21:34,383 - INFO - allennlp.common.params - model.encoder.emb_dim = 768
+2025-03-24 13:21:34,384 - INFO - allennlp.common.params - model.encoder.hid_dim = 512
+2025-03-24 13:21:34,384 - INFO - allennlp.common.params - model.encoder.dropout = 0.5
+2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.max_decoding_steps = 16
+2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.knowledge_points_ratio = 0
+2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.beam_size = 10
+2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.target_namespace = tokens
+2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.target_embedding_dim = 512
+2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.scheduled_sampling_ratio = 0
+2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.resnet_pretrained = ./
+2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.use_bleu = True
+2025-03-24 13:21:39,512 - INFO - root - Loading a model trained before embedding extension was implemented; pass an explicit vocab namespace if you want to extend the vocabulary.
+2025-03-24 13:21:39,662 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.data.iterators.data_iterator.DataIterator'> from params {'batch_size': 32, 'type': 'basic'} and extras set()
+2025-03-24 13:21:39,662 - INFO - allennlp.common.params - iterator.type = basic
+2025-03-24 13:21:39,662 - INFO - allennlp.common.from_params - instantiating class <class 'allennlp.data.iterators.basic_iterator.BasicIterator'> from params {'batch_size': 32} and extras set()
+2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.batch_size = 32
+2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.instances_per_epoch = None
+2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.max_instances_in_memory = None
+2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.cache_instances = False
+2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.track_epoch = False
+2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.maximum_samples_per_batch = None
+2025-03-24 13:21:39,663 - INFO - allennlp.common.params - validation_iterator = None
+2025-03-24 13:21:39,663 - INFO - allennlp.common.params - trainer.no_grad = ()
+2025-03-24 13:21:39,671 - INFO - allennlp.training.trainer_pieces - Following parameters are Frozen  (without gradient):
+2025-03-24 13:21:39,671 - INFO - allennlp.training.trainer_pieces - vit_model.vit.embeddings.position_embeddings
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_pos_embed
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - Following parameters are Tunable (with gradient):
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - channel_transform.weight
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - channel_transform.bias
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_v.weight
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_v.bias
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_k.weight
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_k.bias
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_q.weight
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_q.bias
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_merge.weight
+2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_merge.bias
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.ffn.mlp.linear.weight
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.ffn.mlp.linear.bias
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.norm1.a_2
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.norm1.b_2
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.norm2.a_2
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.norm2.b_2
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_v.weight
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_v.bias
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_k.weight
+2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_k.bias
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_q.weight
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_q.bias
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_merge.weight
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_merge.bias
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.ffn.mlp.linear.weight
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.ffn.mlp.linear.bias
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.norm1.a_2
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.norm1.b_2
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.norm2.a_2
+2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.norm2.b_2
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_v.weight
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_v.bias
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_k.weight
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_k.bias
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_q.weight
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_q.bias
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_merge.weight
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_merge.bias
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.ffn.mlp.linear.weight
+2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.ffn.mlp.linear.bias
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.norm1.a_2
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.norm1.b_2
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.norm2.a_2
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.norm2.b_2
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_v.weight
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_v.bias
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_k.weight
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_k.bias
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_q.weight
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_q.bias
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_merge.weight
+2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_merge.bias
+2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.ffn.mlp.linear.weight
+2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.ffn.mlp.linear.bias
+2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.norm1.a_2
+2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.norm1.b_2
+2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.norm2.a_2
+2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.norm2.b_2
+2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_v.weight
+2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_v.bias
+2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_k.weight
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_k.bias
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_q.weight
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_q.bias
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_merge.weight
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_merge.bias
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.ffn.mlp.linear.weight
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.ffn.mlp.linear.bias
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.norm1.a_2
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.norm1.b_2
+2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.norm2.a_2
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.norm2.b_2
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_v.weight
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_v.bias
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_k.weight
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_k.bias
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_q.weight
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_q.bias
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_merge.weight
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_merge.bias
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.ffn.mlp.linear.weight
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.ffn.mlp.linear.bias
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.norm1.a_2
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.norm1.b_2
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.norm2.a_2
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.norm2.b_2
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_v.weight
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_v.bias
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_k.weight
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_k.bias
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_q.weight
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_q.bias
+2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_merge.weight
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_merge.bias
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_v.weight
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_v.bias
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_k.weight
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_k.bias
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_q.weight
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_q.bias
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_merge.weight
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_merge.bias
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.ffn.mlp.linear.weight
+2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.ffn.mlp.linear.bias
+2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm1.a_2
+2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm1.b_2
+2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm2.a_2
+2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm2.b_2
+2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm3.a_2
+2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm3.b_2
+2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_v.weight
+2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_v.bias
+2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_k.weight
+2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_k.bias
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_q.weight
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_q.bias
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_merge.weight
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_merge.bias
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_v.weight
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_v.bias
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_k.weight
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_k.bias
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_q.weight
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_q.bias
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_merge.weight
+2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_merge.bias
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.ffn.mlp.linear.weight
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.ffn.mlp.linear.bias
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm1.a_2
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm1.b_2
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm2.a_2
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm2.b_2
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm3.a_2
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm3.b_2
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_v.weight
+2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_v.bias
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_k.weight
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_k.bias
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_q.weight
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_q.bias
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_merge.weight
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_merge.bias
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_v.weight
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_v.bias
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_k.weight
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_k.bias
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_q.weight
+2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_q.bias
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_merge.weight
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_merge.bias
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.ffn.mlp.linear.weight
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.ffn.mlp.linear.bias
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm1.a_2
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm1.b_2
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm2.a_2
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm2.b_2
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm3.a_2
+2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm3.b_2
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_v.weight
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_v.bias
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_k.weight
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_k.bias
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_q.weight
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_q.bias
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_merge.weight
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_merge.bias
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_v.weight
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_v.bias
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_k.weight
+2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_k.bias
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_q.weight
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_q.bias
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_merge.weight
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_merge.bias
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.ffn.mlp.linear.weight
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.ffn.mlp.linear.bias
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm1.a_2
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm1.b_2
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm2.a_2
+2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm2.b_2
+2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm3.a_2
+2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm3.b_2
+2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_v.weight
+2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_v.bias
+2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_k.weight
+2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_k.bias
+2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_q.weight
+2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_q.bias
+2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_merge.weight
+2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_merge.bias
+2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_v.weight
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_v.bias
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_k.weight
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_k.bias
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_q.weight
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_q.bias
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_merge.weight
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_merge.bias
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.ffn.mlp.linear.weight
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.ffn.mlp.linear.bias
+2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm1.a_2
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm1.b_2
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm2.a_2
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm2.b_2
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm3.a_2
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm3.b_2
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_v.weight
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_v.bias
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_k.weight
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_k.bias
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_q.weight
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_q.bias
+2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_merge.weight
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_merge.bias
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_v.weight
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_v.bias
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_k.weight
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_k.bias
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_q.weight
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_q.bias
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_merge.weight
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_merge.bias
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.ffn.mlp.linear.weight
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.ffn.mlp.linear.bias
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm1.a_2
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm1.b_2
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm2.a_2
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm2.b_2
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm3.a_2
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm3.b_2
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - attflat_img.mlp.fc.linear.weight
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - attflat_img.mlp.fc.linear.bias
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - attflat_img.mlp.linear.weight
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - attflat_img.mlp.linear.bias
+2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - attflat_img.linear_merge.weight
+2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_img.linear_merge.bias
+2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.mlp.fc.linear.weight
+2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.mlp.fc.linear.bias
+2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.mlp.linear.weight
+2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.mlp.linear.bias
+2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.linear_merge.weight
+2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.linear_merge.bias
+2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - decode_transform.weight
+2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - decode_transform.bias
+2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.embeddings.word_embeddings.weight
+2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.embeddings.position_embeddings.weight
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.embeddings.token_type_embeddings.weight
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.embeddings.LayerNorm.weight
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.embeddings.LayerNorm.bias
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.query.weight
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.query.bias
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.key.weight
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.key.bias
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.value.weight
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.value.bias
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.output.dense.weight
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.output.dense.bias
+2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.intermediate.dense.weight
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.intermediate.dense.bias
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.output.dense.weight
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.output.dense.bias
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.output.LayerNorm.weight
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.output.LayerNorm.bias
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.query.weight
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.query.bias
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.key.weight
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.key.bias
+2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.value.weight
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.value.bias
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.output.dense.weight
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.output.dense.bias
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.intermediate.dense.weight
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.intermediate.dense.bias
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.output.dense.weight
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.output.dense.bias
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.output.LayerNorm.weight
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.output.LayerNorm.bias
+2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.query.weight
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.query.bias
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.key.weight
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.key.bias
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.value.weight
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.value.bias
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.output.dense.weight
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.output.dense.bias
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.intermediate.dense.weight
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.intermediate.dense.bias
+2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.output.dense.weight
+2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.output.dense.bias
+2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.output.LayerNorm.weight
+2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.output.LayerNorm.bias
+2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.query.weight
+2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.query.bias
+2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.key.weight
+2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.key.bias
+2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.value.weight
+2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.value.bias
+2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.output.dense.weight
+2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.output.dense.bias
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.intermediate.dense.weight
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.intermediate.dense.bias
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.output.dense.weight
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.output.dense.bias
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.output.LayerNorm.weight
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.output.LayerNorm.bias
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.query.weight
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.query.bias
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.key.weight
+2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.key.bias
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.value.weight
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.value.bias
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.output.dense.weight
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.output.dense.bias
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.intermediate.dense.weight
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.intermediate.dense.bias
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.output.dense.weight
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.output.dense.bias
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.output.LayerNorm.weight
+2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.output.LayerNorm.bias
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.query.weight
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.query.bias
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.key.weight
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.key.bias
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.value.weight
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.value.bias
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.output.dense.weight
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.output.dense.bias
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.intermediate.dense.weight
+2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.intermediate.dense.bias
+2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.output.dense.weight
+2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.output.dense.bias
+2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.output.LayerNorm.weight
+2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.output.LayerNorm.bias
+2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.query.weight
+2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.query.bias
+2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.key.weight
+2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.key.bias
+2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.value.weight
+2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.value.bias
+2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.output.dense.weight
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.output.dense.bias
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.intermediate.dense.weight
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.intermediate.dense.bias
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.output.dense.weight
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.output.dense.bias
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.output.LayerNorm.weight
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.output.LayerNorm.bias
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.query.weight
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.query.bias
+2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.key.weight
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.key.bias
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.value.weight
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.value.bias
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.output.dense.weight
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.output.dense.bias
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.intermediate.dense.weight
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.intermediate.dense.bias
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.output.dense.weight
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.output.dense.bias
+2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.output.LayerNorm.weight
+2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.output.LayerNorm.bias
+2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.query.weight
+2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.query.bias
+2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.key.weight
+2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.key.bias
+2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.value.weight
+2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.value.bias
+2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.output.dense.weight
+2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.output.dense.bias
+2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.intermediate.dense.weight
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.intermediate.dense.bias
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.output.dense.weight
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.output.dense.bias
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.output.LayerNorm.weight
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.output.LayerNorm.bias
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.query.weight
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.query.bias
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.key.weight
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.key.bias
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.value.weight
+2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.value.bias
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.output.dense.weight
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.output.dense.bias
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.intermediate.dense.weight
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.intermediate.dense.bias
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.output.dense.weight
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.output.dense.bias
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.output.LayerNorm.weight
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.output.LayerNorm.bias
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.query.weight
+2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.query.bias
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.key.weight
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.key.bias
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.value.weight
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.value.bias
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.output.dense.weight
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.output.dense.bias
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.intermediate.dense.weight
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.intermediate.dense.bias
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.output.dense.weight
+2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.output.dense.bias
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.output.LayerNorm.weight
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.output.LayerNorm.bias
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.query.weight
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.query.bias
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.key.weight
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.key.bias
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.value.weight
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.value.bias
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.output.dense.weight
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.output.dense.bias
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.output.LayerNorm.weight
+2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.output.LayerNorm.bias
+2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.intermediate.dense.weight
+2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.intermediate.dense.bias
+2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.output.dense.weight
+2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.output.dense.bias
+2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.output.LayerNorm.weight
+2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.output.LayerNorm.bias
+2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.pooler.dense.weight
+2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.pooler.dense.bias
+2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.trans.weight
+2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.trans.bias
+2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.norm.weight
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.norm.bias
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.lstm_embedding.weight
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.lstm.weight_ih_l0
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.lstm.weight_hh_l0
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.lstm.bias_ih_l0
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.lstm.bias_hh_l0
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.concat_trans.weight
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.concat_trans.bias
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.concat_norm.weight
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.concat_norm.bias
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.merge_lstm.weight_ih_l0
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.merge_lstm.weight_hh_l0
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.merge_lstm.bias_ih_l0
+2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.merge_lstm.bias_hh_l0
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_mlp.weight
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_mlp.bias
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_norm.weight
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_norm.bias
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.early_gru.weight_ih_l0
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.early_gru.weight_hh_l0
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.early_gru.bias_ih_l0
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.early_gru.bias_hh_l0
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_v.weight
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_v.bias
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_k.weight
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_k.bias
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_q.weight
+2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_q.bias
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_merge.weight
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_merge.bias
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_v.weight
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_v.bias
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_k.weight
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_k.bias
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_q.weight
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_q.bias
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_merge.weight
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_merge.bias
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.ffn.mlp.linear.weight
+2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.ffn.mlp.linear.bias
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm1.a_2
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm1.b_2
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm2.a_2
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm2.b_2
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm3.a_2
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm3.b_2
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_v.weight
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_v.bias
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_k.weight
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_k.bias
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_q.weight
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_q.bias
+2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_merge.weight
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_merge.bias
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_v.weight
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_v.bias
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_k.weight
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_k.bias
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_q.weight
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_q.bias
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_merge.weight
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_merge.bias
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.ffn.mlp.fc.linear.weight
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.ffn.mlp.fc.linear.bias
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.ffn.mlp.linear.weight
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.ffn.mlp.linear.bias
+2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm1.a_2
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm1.b_2
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm2.a_2
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm2.b_2
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm3.a_2
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm3.b_2
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _target_embedder.weight
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _decoder_cell.weight_ih
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _decoder_cell.weight_hh
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _decoder_cell.bias_ih
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _decoder_cell.bias_hh
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _output_projection_layer.weight
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _output_projection_layer.bias
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - vit_model.vit.embeddings.cls_token
+2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - vit_model.vit.embeddings.patch_embeddings.projection.weight
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.embeddings.patch_embeddings.projection.bias
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.query.weight
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.query.bias
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.key.weight
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.key.bias
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.value.weight
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.value.bias
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.output.dense.weight
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.output.dense.bias
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.intermediate.dense.weight
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.intermediate.dense.bias
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.output.dense.weight
+2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.output.dense.bias
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.layernorm_before.weight
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.layernorm_before.bias
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.layernorm_after.weight
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.layernorm_after.bias
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.query.weight
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.query.bias
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.key.weight
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.key.bias
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.value.weight
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.value.bias
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.output.dense.weight
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.output.dense.bias
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.intermediate.dense.weight
+2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.intermediate.dense.bias
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.output.dense.weight
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.output.dense.bias
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.layernorm_before.weight
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.layernorm_before.bias
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.layernorm_after.weight
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.layernorm_after.bias
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.query.weight
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.query.bias
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.key.weight
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.key.bias
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.value.weight
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.value.bias
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.output.dense.weight
+2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.output.dense.bias
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.intermediate.dense.weight
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.intermediate.dense.bias
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.output.dense.weight
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.output.dense.bias
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.layernorm_before.weight
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.layernorm_before.bias
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.layernorm_after.weight
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.layernorm_after.bias
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.query.weight
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.query.bias
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.key.weight
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.key.bias
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.value.weight
+2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.value.bias
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.output.dense.weight
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.output.dense.bias
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.intermediate.dense.weight
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.intermediate.dense.bias
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.output.dense.weight
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.output.dense.bias
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.layernorm_before.weight
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.layernorm_before.bias
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.layernorm_after.weight
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.layernorm_after.bias
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.query.weight
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.query.bias
+2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.key.weight
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.key.bias
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.value.weight
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.value.bias
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.output.dense.weight
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.output.dense.bias
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.intermediate.dense.weight
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.intermediate.dense.bias
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.output.dense.weight
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.output.dense.bias
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.layernorm_before.weight
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.layernorm_before.bias
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.layernorm_after.weight
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.layernorm_after.bias
+2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.query.weight
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.query.bias
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.key.weight
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.key.bias
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.value.weight
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.value.bias
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.output.dense.weight
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.output.dense.bias
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.intermediate.dense.weight
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.intermediate.dense.bias
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.output.dense.weight
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.output.dense.bias
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.layernorm_before.weight
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.layernorm_before.bias
+2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.layernorm_after.weight
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.layernorm_after.bias
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.query.weight
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.query.bias
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.key.weight
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.key.bias
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.value.weight
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.value.bias
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.output.dense.weight
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.output.dense.bias
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.intermediate.dense.weight
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.intermediate.dense.bias
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.output.dense.weight
+2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.output.dense.bias
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.layernorm_before.weight
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.layernorm_before.bias
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.layernorm_after.weight
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.layernorm_after.bias
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.query.weight
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.query.bias
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.key.weight
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.key.bias
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.value.weight
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.value.bias
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.output.dense.weight
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.output.dense.bias
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.intermediate.dense.weight
+2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.intermediate.dense.bias
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.output.dense.weight
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.output.dense.bias
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.layernorm_before.weight
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.layernorm_before.bias
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.layernorm_after.weight
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.layernorm_after.bias
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.query.weight
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.query.bias
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.key.weight
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.key.bias
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.value.weight
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.value.bias
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.output.dense.weight
+2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.output.dense.bias
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.intermediate.dense.weight
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.intermediate.dense.bias
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.output.dense.weight
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.output.dense.bias
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.layernorm_before.weight
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.layernorm_before.bias
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.layernorm_after.weight
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.layernorm_after.bias
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.query.weight
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.query.bias
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.key.weight
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.key.bias
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.value.weight
+2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.value.bias
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.output.dense.weight
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.output.dense.bias
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.intermediate.dense.weight
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.intermediate.dense.bias
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.output.dense.weight
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.output.dense.bias
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.layernorm_before.weight
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.layernorm_before.bias
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.layernorm_after.weight
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.layernorm_after.bias
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.query.weight
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.query.bias
+2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.key.weight
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.key.bias
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.value.weight
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.value.bias
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.output.dense.weight
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.output.dense.bias
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.intermediate.dense.weight
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.intermediate.dense.bias
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.output.dense.weight
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.output.dense.bias
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.layernorm_before.weight
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.layernorm_before.bias
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.layernorm_after.weight
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.layernorm_after.bias
+2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.query.weight
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.query.bias
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.key.weight
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.key.bias
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.value.weight
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.value.bias
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.output.dense.weight
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.output.dense.bias
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.intermediate.dense.weight
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.intermediate.dense.bias
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.output.dense.weight
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.output.dense.bias
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.layernorm_before.weight
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.layernorm_before.bias
+2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.layernorm_after.weight
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.layernorm_after.bias
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.vit.layernorm.weight
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.vit.layernorm.bias
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.mask_token
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_embed.weight
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_embed.bias
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.query.weight
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.query.bias
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.key.weight
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.key.bias
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.value.weight
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.value.bias
+2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.output.dense.weight
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.output.dense.bias
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.intermediate.dense.weight
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.intermediate.dense.bias
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.output.dense.weight
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.output.dense.bias
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.layernorm_before.weight
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.layernorm_before.bias
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.layernorm_after.weight
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.layernorm_after.bias
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.query.weight
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.query.bias
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.key.weight
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.key.bias
+2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.value.weight
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.value.bias
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.output.dense.weight
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.output.dense.bias
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.intermediate.dense.weight
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.intermediate.dense.bias
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.output.dense.weight
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.output.dense.bias
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.layernorm_before.weight
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.layernorm_before.bias
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.layernorm_after.weight
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.layernorm_after.bias
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.query.weight
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.query.bias
+2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.key.weight
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.key.bias
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.value.weight
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.value.bias
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.output.dense.weight
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.output.dense.bias
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.intermediate.dense.weight
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.intermediate.dense.bias
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.output.dense.weight
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.output.dense.bias
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.layernorm_before.weight
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.layernorm_before.bias
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.layernorm_after.weight
+2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.layernorm_after.bias
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.query.weight
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.query.bias
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.key.weight
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.key.bias
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.value.weight
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.value.bias
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.output.dense.weight
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.output.dense.bias
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.intermediate.dense.weight
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.intermediate.dense.bias
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.output.dense.weight
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.output.dense.bias
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.layernorm_before.weight
+2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.layernorm_before.bias
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.layernorm_after.weight
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.layernorm_after.bias
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.query.weight
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.query.bias
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.key.weight
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.key.bias
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.value.weight
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.value.bias
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.output.dense.weight
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.output.dense.bias
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.intermediate.dense.weight
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.intermediate.dense.bias
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.output.dense.weight
+2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.output.dense.bias
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.layernorm_before.weight
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.layernorm_before.bias
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.layernorm_after.weight
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.layernorm_after.bias
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.query.weight
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.query.bias
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.key.weight
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.key.bias
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.value.weight
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.value.bias
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.output.dense.weight
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.output.dense.bias
+2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.intermediate.dense.weight
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.intermediate.dense.bias
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.output.dense.weight
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.output.dense.bias
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.layernorm_before.weight
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.layernorm_before.bias
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.layernorm_after.weight
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.layernorm_after.bias
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.query.weight
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.query.bias
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.key.weight
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.key.bias
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.value.weight
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.value.bias
+2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.output.dense.weight
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.output.dense.bias
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.intermediate.dense.weight
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.intermediate.dense.bias
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.output.dense.weight
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.output.dense.bias
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.layernorm_before.weight
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.layernorm_before.bias
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.layernorm_after.weight
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.layernorm_after.bias
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.query.weight
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.query.bias
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.key.weight
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.key.bias
+2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.value.weight
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.value.bias
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.output.dense.weight
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.output.dense.bias
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.intermediate.dense.weight
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.intermediate.dense.bias
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.output.dense.weight
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.output.dense.bias
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.layernorm_before.weight
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.layernorm_before.bias
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.layernorm_after.weight
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.layernorm_after.bias
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_norm.weight
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_norm.bias
+2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_pred.weight
+2025-03-24 13:21:39,742 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_pred.bias
+2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.patience = None
+2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.validation_metric = +acc
+2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.shuffle = True
+2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.num_epochs = 100
+2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.cuda_device = 0
+2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.grad_norm = 10
+2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.grad_clipping = None
+2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.momentum_scheduler = None
+2025-03-24 13:21:44,607 - INFO - allennlp.common.params - trainer.optimizer.type = adam
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently.
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: 
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - trainer.optimizer.parameter_groups.0.1.lr = 1e-05
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently.
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: 
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - trainer.optimizer.parameter_groups.1.1.lr = 1e-05
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently.
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: 
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - trainer.optimizer.parameter_groups.2.1.lr = 2e-05
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently.
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: 
+2025-03-24 13:21:44,608 - INFO - allennlp.common.params - trainer.optimizer.parameter_groups.3.1.lr = 0.001
+2025-03-24 13:21:44,621 - INFO - allennlp.training.optimizers - Done constructing parameter groups.
+2025-03-24 13:21:44,621 - INFO - allennlp.training.optimizers - Group 0: ['mcan.dec_list.2.norm2.a_2', 'mcan.enc_list.0.ffn.mlp.fc.linear.weight', 'mcan.enc_list.4.mhatt.linear_k.bias', 'mcan.enc_list.3.ffn.mlp.fc.linear.weight', 'mcan.dec_list.2.norm2.b_2', 'mcan.enc_list.2.norm2.a_2', '_encoder.merge_att.sga_list.0.mhatt2.linear_merge.weight', 'mcan.enc_list.5.mhatt.linear_k.weight', 'mcan.dec_list.1.ffn.mlp.fc.linear.bias', 'mcan.dec_list.5.ffn.mlp.linear.weight', 'mcan.dec_list.1.norm3.a_2', 'mcan.dec_list.0.mhatt2.linear_v.bias', 'mcan.enc_list.5.ffn.mlp.linear.bias', 'mcan.dec_list.4.mhatt2.linear_k.bias', '_encoder.merge_att.sga_list.1.mhatt1.linear_v.bias', '_encoder.merge_att.sga_list.1.ffn.mlp.fc.linear.weight', '_encoder.merge_att.sga_list.1.mhatt2.linear_q.weight', 'mcan.enc_list.2.ffn.mlp.fc.linear.bias', 'mcan.dec_list.2.mhatt2.linear_merge.bias', 'mcan.enc_list.5.mhatt.linear_v.bias', 'mcan.dec_list.5.mhatt2.linear_k.bias', 'mcan.dec_list.4.norm3.b_2', 'mcan.dec_list.0.norm2.b_2', 'mcan.dec_list.4.ffn.mlp.linear.weight', '_encoder.merge_att.sga_list.0.mhatt1.linear_q.weight', 'mcan.dec_list.3.mhatt2.linear_merge.bias', '_encoder.merge_att.sga_list.0.mhatt1.linear_k.weight', '_encoder.merge_att.sga_list.0.mhatt2.linear_k.bias', '_encoder.merge_att.sga_list.0.norm3.b_2', 'mcan.dec_list.1.mhatt1.linear_v.weight', 'mcan.enc_list.0.mhatt.linear_q.weight', 'mcan.dec_list.5.mhatt2.linear_q.weight', '_encoder.merge_att.sga_list.0.mhatt2.linear_merge.bias', 'attflat_img.mlp.linear.bias', 'mcan.dec_list.1.mhatt2.linear_q.weight', 'mcan.dec_list.3.norm2.a_2', 'mcan.dec_list.3.ffn.mlp.fc.linear.bias', 'mcan.dec_list.4.mhatt1.linear_v.weight', 'mcan.enc_list.3.ffn.mlp.fc.linear.bias', 'mcan.dec_list.5.norm1.a_2', '_encoder.merge_att.sga_list.1.mhatt2.linear_q.bias', 'mcan.enc_list.4.mhatt.linear_q.weight', 'mcan.enc_list.4.mhatt.linear_merge.bias', 'mcan.enc_list.2.mhatt.linear_k.bias', '_encoder.merge_att.sga_list.1.norm2.b_2', '_encoder.merge_att.sga_list.0.norm2.a_2', '_encoder.merge_att.sga_list.1.ffn.mlp.linear.weight', 'mcan.enc_list.5.mhatt.linear_q.bias', 'mcan.enc_list.3.mhatt.linear_k.weight', '_encoder.merge_att.sga_list.0.norm2.b_2', 'mcan.dec_list.4.ffn.mlp.linear.bias', 'mcan.enc_list.2.mhatt.linear_q.weight', 'mcan.enc_list.4.ffn.mlp.linear.bias', 'mcan.dec_list.1.mhatt2.linear_merge.weight', 'mcan.dec_list.3.mhatt1.linear_merge.weight', '_encoder.merge_att.sga_list.0.mhatt1.linear_v.weight', 'mcan.dec_list.2.mhatt1.linear_v.weight', 'mcan.dec_list.5.mhatt2.linear_merge.bias', 'mcan.enc_list.4.mhatt.linear_q.bias', 'mcan.dec_list.2.mhatt2.linear_k.weight', 'mcan.enc_list.4.ffn.mlp.fc.linear.bias', 'mcan.dec_list.4.mhatt2.linear_v.bias', 'mcan.dec_list.0.mhatt1.linear_merge.bias', '_encoder.merge_att.sga_list.1.mhatt1.linear_k.weight', 'mcan.dec_list.0.mhatt2.linear_k.weight', 'mcan.enc_list.1.mhatt.linear_q.bias', 'attflat_lang.mlp.linear.bias', 'mcan.dec_list.2.mhatt2.linear_v.weight', 'mcan.enc_list.3.mhatt.linear_v.bias', '_encoder.merge_att.sga_list.0.ffn.mlp.fc.linear.bias', 'attflat_lang.linear_merge.bias', 'mcan.dec_list.5.mhatt1.linear_k.weight', 'mcan.dec_list.0.norm3.a_2', 'mcan.enc_list.2.mhatt.linear_v.bias', 'mcan.dec_list.1.mhatt2.linear_k.bias', 'mcan.dec_list.2.ffn.mlp.linear.weight', 'mcan.enc_list.1.ffn.mlp.fc.linear.weight', '_encoder.merge_att.sga_list.1.mhatt1.linear_v.weight', '_encoder.merge_att.sga_list.1.mhatt1.linear_k.bias', 'mcan.enc_list.3.norm1.b_2', 'mcan.dec_list.2.mhatt2.linear_merge.weight', '_encoder.merge_att.sga_list.0.norm1.a_2', 'mcan.dec_list.4.mhatt1.linear_merge.weight', 'mcan.dec_list.4.mhatt1.linear_q.weight', 'mcan.dec_list.5.ffn.mlp.fc.linear.weight', 'mcan.enc_list.5.mhatt.linear_merge.weight', 'mcan.dec_list.3.ffn.mlp.linear.bias', '_encoder.merge_att.sga_list.0.mhatt1.linear_q.bias', 'mcan.dec_list.2.ffn.mlp.linear.bias', 'mcan.dec_list.2.mhatt1.linear_k.bias', 'mcan.enc_list.1.ffn.mlp.fc.linear.bias', 'mcan.dec_list.3.mhatt2.linear_q.bias', 'mcan.enc_list.3.mhatt.linear_merge.bias', 'mcan.dec_list.2.mhatt1.linear_k.weight', '_encoder.merge_att.sga_list.1.mhatt2.linear_v.weight', 'mcan.dec_list.1.mhatt1.linear_merge.weight', 'mcan.enc_list.3.mhatt.linear_q.weight', '_encoder.merge_att.sga_list.1.mhatt1.linear_merge.bias', 'mcan.dec_list.5.norm3.a_2', '_encoder.merge_att.sga_list.0.norm1.b_2', 'mcan.enc_list.4.ffn.mlp.fc.linear.weight', 'mcan.dec_list.5.norm2.b_2', 'mcan.enc_list.5.norm1.b_2', 'mcan.enc_list.4.norm1.a_2', 'mcan.dec_list.5.mhatt1.linear_merge.bias', 'mcan.enc_list.4.mhatt.linear_v.bias', 'mcan.dec_list.5.mhatt1.linear_v.weight', 'attflat_lang.mlp.fc.linear.bias', 'mcan.dec_list.4.mhatt2.linear_merge.bias', 'mcan.enc_list.0.norm2.b_2', 'mcan.enc_list.2.mhatt.linear_q.bias', 'mcan.dec_list.5.mhatt1.linear_k.bias', 'mcan.enc_list.0.norm1.a_2', 'mcan.enc_list.1.mhatt.linear_q.weight', 'mcan.enc_list.3.norm2.a_2', 'mcan.dec_list.2.norm3.b_2', 'mcan.dec_list.0.norm1.a_2', 'mcan.dec_list.0.mhatt1.linear_v.bias', 'mcan.dec_list.4.mhatt2.linear_v.weight', '_encoder.merge_att.sga_list.0.mhatt2.linear_k.weight', 'mcan.enc_list.0.mhatt.linear_v.bias', '_encoder.merge_att.sga_list.1.norm1.a_2', '_encoder.merge_att.sga_list.0.mhatt1.linear_merge.bias', 'mcan.enc_list.4.mhatt.linear_k.weight', 'mcan.dec_list.1.ffn.mlp.linear.bias', 'mcan.enc_list.5.mhatt.linear_v.weight', 'mcan.enc_list.3.mhatt.linear_k.bias', 'mcan.dec_list.0.ffn.mlp.linear.bias', 'mcan.dec_list.2.mhatt2.linear_q.bias', 'mcan.dec_list.0.mhatt1.linear_k.bias', 'mcan.dec_list.0.mhatt2.linear_v.weight', 'attflat_img.mlp.linear.weight', 'attflat_img.mlp.fc.linear.weight', 'mcan.enc_list.0.mhatt.linear_merge.bias', 'mcan.dec_list.2.mhatt2.linear_k.bias', 'mcan.enc_list.0.norm1.b_2', 'mcan.dec_list.1.mhatt1.linear_merge.bias', 'mcan.enc_list.4.ffn.mlp.linear.weight', 'mcan.enc_list.2.norm1.b_2', 'mcan.dec_list.1.mhatt1.linear_v.bias', 'mcan.enc_list.5.ffn.mlp.fc.linear.weight', 'mcan.dec_list.4.mhatt2.linear_q.weight', 'mcan.dec_list.2.mhatt1.linear_merge.weight', 'mcan.dec_list.3.mhatt2.linear_q.weight', 'mcan.dec_list.0.mhatt2.linear_merge.bias', 'mcan.dec_list.0.ffn.mlp.fc.linear.weight', 'mcan.dec_list.5.mhatt2.linear_v.bias', 'mcan.dec_list.4.norm2.b_2', 'mcan.dec_list.3.mhatt1.linear_v.bias', 'decode_transform.bias', '_encoder.merge_att.sga_list.0.mhatt2.linear_v.weight', 'mcan.dec_list.0.ffn.mlp.linear.weight', 'mcan.dec_list.0.ffn.mlp.fc.linear.bias', 'mcan.dec_list.5.mhatt2.linear_merge.weight', 'mcan.dec_list.1.mhatt1.linear_k.weight', 'mcan.dec_list.4.mhatt2.linear_k.weight', 'mcan.dec_list.1.norm3.b_2', 'mcan.dec_list.0.norm1.b_2', 'attflat_img.linear_merge.weight', 'mcan.dec_list.3.norm3.b_2', 'mcan.enc_list.3.mhatt.linear_v.weight', 'mcan.dec_list.3.mhatt2.linear_v.bias', 'mcan.dec_list.3.mhatt1.linear_q.bias', 'mcan.enc_list.3.mhatt.linear_q.bias', 'mcan.enc_list.5.ffn.mlp.fc.linear.bias', 'mcan.dec_list.3.ffn.mlp.fc.linear.weight', '_encoder.merge_att.sga_list.1.mhatt1.linear_q.weight', 'mcan.dec_list.1.ffn.mlp.fc.linear.weight', 'mcan.enc_list.5.mhatt.linear_merge.bias', 'mcan.dec_list.2.mhatt1.linear_q.bias', 'mcan.enc_list.1.norm1.a_2', 'attflat_lang.mlp.fc.linear.weight', '_encoder.merge_att.sga_list.0.mhatt2.linear_q.bias', 'mcan.dec_list.1.mhatt2.linear_q.bias', 'mcan.enc_list.1.mhatt.linear_k.bias', 'mcan.dec_list.2.mhatt1.linear_q.weight', 'mcan.dec_list.3.mhatt1.linear_k.bias', 'mcan.enc_list.1.mhatt.linear_merge.weight', 'mcan.enc_list.3.norm1.a_2', 'mcan.dec_list.1.norm2.b_2', 'mcan.dec_list.4.mhatt1.linear_v.bias', 'mcan.enc_list.0.mhatt.linear_k.bias', '_encoder.merge_att.sga_list.0.mhatt1.linear_merge.weight', 'mcan.enc_list.4.mhatt.linear_v.weight', '_encoder.merge_att.sga_list.1.norm3.b_2', 'mcan.dec_list.3.ffn.mlp.linear.weight', 'mcan.enc_list.2.norm2.b_2', 'mcan.enc_list.0.norm2.a_2', 'mcan.enc_list.0.mhatt.linear_q.bias', 'mcan.dec_list.2.mhatt2.linear_q.weight', 'mcan.dec_list.4.norm2.a_2', 'mcan.dec_list.0.mhatt1.linear_q.weight', '_encoder.merge_att.sga_list.0.ffn.mlp.fc.linear.weight', 'mcan.enc_list.1.norm2.a_2', 'channel_transform.bias', 'mcan.dec_list.2.norm1.b_2', 'mcan.enc_list.1.ffn.mlp.linear.bias', '_encoder.merge_att.sga_list.0.mhatt2.linear_q.weight', 'mcan.dec_list.1.mhatt2.linear_merge.bias', 'mcan.dec_list.1.norm2.a_2', '_encoder.merge_att.sga_list.0.mhatt1.linear_k.bias', '_encoder.merge_att.sga_list.0.mhatt1.linear_v.bias', 'mcan.dec_list.0.norm3.b_2', 'mcan.enc_list.5.mhatt.linear_k.bias', 'attflat_lang.linear_merge.weight', 'mcan.dec_list.5.mhatt1.linear_q.bias', 'mcan.dec_list.3.mhatt2.linear_k.bias', 'mcan.dec_list.0.mhatt1.linear_v.weight', 'mcan.dec_list.5.norm2.a_2', 'mcan.dec_list.4.mhatt1.linear_merge.bias', 'mcan.enc_list.4.norm2.a_2', '_encoder.merge_att.sga_list.1.norm3.a_2', 'mcan.dec_list.5.mhatt2.linear_q.bias', 'mcan.dec_list.4.ffn.mlp.fc.linear.weight', 'decode_transform.weight', 'mcan.dec_list.0.mhatt1.linear_merge.weight', 'mcan.enc_list.5.ffn.mlp.linear.weight', 'mcan.dec_list.2.ffn.mlp.fc.linear.bias', 'mcan.enc_list.1.ffn.mlp.linear.weight', 'channel_transform.weight', 'attflat_img.mlp.fc.linear.bias', 'mcan.enc_list.4.norm2.b_2', 'mcan.enc_list.2.mhatt.linear_merge.weight', 'mcan.dec_list.2.mhatt1.linear_merge.bias', '_encoder.merge_att.sga_list.1.ffn.mlp.fc.linear.bias', 'mcan.enc_list.2.ffn.mlp.linear.bias', 'mcan.dec_list.4.mhatt2.linear_q.bias', 'mcan.dec_list.3.mhatt1.linear_v.weight', '_encoder.merge_att.sga_list.1.mhatt1.linear_merge.weight', 'mcan.dec_list.0.mhatt2.linear_k.bias', 'mcan.dec_list.4.mhatt1.linear_k.weight', 'mcan.enc_list.1.norm1.b_2', 'mcan.dec_list.4.mhatt2.linear_merge.weight', 'mcan.dec_list.2.mhatt1.linear_v.bias', 'mcan.dec_list.1.ffn.mlp.linear.weight', '_encoder.merge_att.sga_list.0.mhatt2.linear_v.bias', 'mcan.dec_list.0.mhatt2.linear_q.weight', 'mcan.enc_list.3.norm2.b_2', 'mcan.enc_list.4.mhatt.linear_merge.weight', '_encoder.merge_att.sga_list.1.mhatt2.linear_k.bias', 'mcan.enc_list.2.ffn.mlp.linear.weight', 'mcan.enc_list.1.mhatt.linear_v.bias', 'mcan.enc_list.1.norm2.b_2', 'mcan.enc_list.3.mhatt.linear_merge.weight', 'mcan.dec_list.3.norm2.b_2', 'mcan.dec_list.5.mhatt2.linear_k.weight', 'mcan.dec_list.5.mhatt1.linear_q.weight', 'mcan.dec_list.4.norm3.a_2', 'mcan.dec_list.0.mhatt1.linear_q.bias', '_encoder.merge_att.sga_list.1.norm2.a_2', 'mcan.dec_list.5.ffn.mlp.linear.bias', 'mcan.dec_list.3.mhatt2.linear_v.weight', 'mcan.dec_list.5.ffn.mlp.fc.linear.bias', 'mcan.enc_list.0.mhatt.linear_merge.weight', 'mcan.enc_list.2.mhatt.linear_k.weight', 'mcan.dec_list.1.mhatt1.linear_q.weight', 'mcan.dec_list.0.mhatt2.linear_q.bias', 'mcan.dec_list.1.norm1.a_2', 'mcan.enc_list.5.norm2.a_2', 'mcan.dec_list.0.mhatt1.linear_k.weight', '_encoder.merge_att.sga_list.0.norm3.a_2', 'mcan.dec_list.1.mhatt2.linear_k.weight', 'mcan.dec_list.4.norm1.b_2', 'mcan.dec_list.3.mhatt1.linear_merge.bias', 'mcan.dec_list.5.norm1.b_2', 'mcan.dec_list.5.mhatt1.linear_merge.weight', 'mcan.enc_list.0.ffn.mlp.linear.bias', 'mcan.enc_list.1.mhatt.linear_k.weight', 'mcan.enc_list.1.mhatt.linear_merge.bias', 'mcan.dec_list.0.mhatt2.linear_merge.weight', '_encoder.merge_att.sga_list.1.mhatt2.linear_merge.bias', 'mcan.dec_list.1.mhatt1.linear_q.bias', 'mcan.dec_list.4.mhatt1.linear_q.bias', 'mcan.enc_list.4.norm1.b_2', 'mcan.dec_list.3.norm3.a_2', 'mcan.dec_list.4.mhatt1.linear_k.bias', 'mcan.dec_list.4.ffn.mlp.fc.linear.bias', 'mcan.dec_list.3.norm1.b_2', 'mcan.dec_list.2.ffn.mlp.fc.linear.weight', 'mcan.enc_list.5.norm2.b_2', '_encoder.merge_att.sga_list.1.mhatt1.linear_q.bias', 'mcan.enc_list.0.ffn.mlp.fc.linear.bias', 'mcan.enc_list.2.norm1.a_2', 'mcan.enc_list.3.ffn.mlp.linear.weight', 'mcan.dec_list.5.mhatt1.linear_v.bias', '_encoder.merge_att.sga_list.1.norm1.b_2', 'mcan.dec_list.5.norm3.b_2', 'mcan.enc_list.0.mhatt.linear_v.weight', '_encoder.merge_att.sga_list.0.ffn.mlp.linear.bias', '_encoder.merge_att.sga_list.0.ffn.mlp.linear.weight', 'mcan.dec_list.5.mhatt2.linear_v.weight', 'mcan.enc_list.5.norm1.a_2', 'mcan.dec_list.0.norm2.a_2', 'mcan.dec_list.2.norm1.a_2', 'mcan.dec_list.3.mhatt1.linear_q.weight', 'mcan.enc_list.5.mhatt.linear_q.weight', 'mcan.dec_list.3.mhatt2.linear_k.weight', 'mcan.enc_list.2.mhatt.linear_v.weight', 'mcan.dec_list.1.mhatt2.linear_v.weight', 'mcan.enc_list.0.ffn.mlp.linear.weight', 'mcan.dec_list.1.norm1.b_2', 'mcan.dec_list.3.norm1.a_2', 'mcan.enc_list.0.mhatt.linear_k.weight', 'mcan.dec_list.3.mhatt1.linear_k.weight', 'attflat_img.linear_merge.bias', '_encoder.merge_att.sga_list.1.mhatt2.linear_k.weight', 'mcan.dec_list.2.norm3.a_2', '_encoder.merge_att.sga_list.1.mhatt2.linear_v.bias', 'attflat_lang.mlp.linear.weight', 'mcan.enc_list.2.mhatt.linear_merge.bias', '_encoder.merge_att.sga_list.1.mhatt2.linear_merge.weight', 'mcan.dec_list.1.mhatt1.linear_k.bias', 'mcan.dec_list.2.mhatt2.linear_v.bias', '_encoder.merge_att.sga_list.1.ffn.mlp.linear.bias', 'mcan.enc_list.3.ffn.mlp.linear.bias', 'mcan.dec_list.4.norm1.a_2', 'mcan.enc_list.1.mhatt.linear_v.weight', 'mcan.dec_list.3.mhatt2.linear_merge.weight', 'mcan.enc_list.2.ffn.mlp.fc.linear.weight', 'mcan.dec_list.1.mhatt2.linear_v.bias'], {'lr': 1e-05}
+2025-03-24 13:21:44,621 - INFO - allennlp.training.optimizers - Group 1: [], {'lr': 1e-05}
+2025-03-24 13:21:44,621 - INFO - allennlp.training.optimizers - Group 2: ['_encoder.embedding.encoder.layer.5.attention.self.value.bias', '_encoder.embedding.encoder.layer.4.attention.self.query.weight', '_encoder.embedding.encoder.layer.4.attention.output.LayerNorm.weight', '_encoder.embedding.pooler.dense.bias', '_encoder.embedding.encoder.layer.10.attention.self.value.weight', '_encoder.embedding.encoder.layer.3.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.11.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.8.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.7.attention.self.query.bias', '_encoder.embedding.encoder.layer.8.attention.self.query.bias', '_encoder.embedding.encoder.layer.6.attention.output.dense.weight', '_encoder.embedding.encoder.layer.4.attention.self.key.weight', '_encoder.embedding.encoder.layer.5.intermediate.dense.weight', '_encoder.embedding.encoder.layer.10.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.3.intermediate.dense.bias', '_encoder.embedding.encoder.layer.1.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.11.attention.output.dense.weight', '_encoder.embedding.encoder.layer.6.intermediate.dense.bias', '_encoder.embedding.encoder.layer.6.attention.self.query.weight', '_encoder.embedding.encoder.layer.6.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.7.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.10.attention.output.dense.bias', '_encoder.embedding.encoder.layer.10.attention.self.key.weight', '_encoder.embedding.encoder.layer.10.intermediate.dense.bias', '_encoder.embedding.encoder.layer.0.intermediate.dense.bias', '_encoder.embedding.encoder.layer.10.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.8.attention.self.key.weight', '_encoder.embedding.encoder.layer.7.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.9.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.11.attention.self.key.weight', '_encoder.embedding.encoder.layer.1.attention.output.dense.weight', '_encoder.embedding.encoder.layer.5.intermediate.dense.bias', '_encoder.embedding.encoder.layer.3.attention.self.key.weight', '_encoder.embedding.encoder.layer.6.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.7.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.5.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.1.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.2.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.8.output.dense.bias', '_encoder.embedding.encoder.layer.1.attention.self.key.bias', '_encoder.embedding.encoder.layer.9.attention.self.query.bias', '_encoder.embedding.embeddings.position_embeddings.weight', '_encoder.embedding.encoder.layer.5.attention.self.value.weight', '_encoder.embedding.encoder.layer.2.intermediate.dense.bias', '_encoder.embedding.encoder.layer.5.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.10.attention.self.query.weight', '_encoder.embedding.encoder.layer.0.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.5.attention.self.key.weight', '_encoder.embedding.encoder.layer.1.attention.self.key.weight', '_encoder.embedding.encoder.layer.4.attention.output.dense.bias', '_encoder.embedding.encoder.layer.4.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.4.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.11.attention.self.query.weight', '_encoder.embedding.encoder.layer.10.attention.self.query.bias', '_encoder.embedding.encoder.layer.10.attention.self.key.bias', '_encoder.embedding.encoder.layer.11.attention.self.key.bias', '_encoder.embedding.encoder.layer.4.intermediate.dense.weight', '_encoder.embedding.encoder.layer.6.attention.self.key.bias', '_encoder.embedding.encoder.layer.6.output.dense.weight', '_encoder.embedding.encoder.layer.1.output.dense.weight', '_encoder.embedding.encoder.layer.2.attention.self.key.weight', '_encoder.embedding.encoder.layer.2.attention.self.query.weight', '_encoder.embedding.encoder.layer.3.attention.self.key.bias', '_encoder.embedding.encoder.layer.0.attention.self.query.bias', '_encoder.embedding.encoder.layer.9.attention.output.dense.weight', '_encoder.embedding.encoder.layer.2.attention.self.value.weight', '_encoder.embedding.encoder.layer.7.intermediate.dense.bias', '_encoder.embedding.encoder.layer.1.attention.self.value.weight', '_encoder.embedding.encoder.layer.10.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.6.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.11.attention.self.query.bias', '_encoder.embedding.encoder.layer.1.attention.self.value.bias', '_encoder.embedding.encoder.layer.3.attention.output.dense.weight', '_encoder.embedding.encoder.layer.1.attention.self.query.weight', '_encoder.embedding.encoder.layer.2.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.6.attention.self.value.bias', '_encoder.embedding.encoder.layer.11.output.dense.weight', '_encoder.embedding.encoder.layer.4.output.dense.bias', '_encoder.embedding.embeddings.LayerNorm.bias', '_encoder.embedding.encoder.layer.2.attention.self.query.bias', '_encoder.embedding.encoder.layer.11.output.dense.bias', '_encoder.embedding.encoder.layer.0.attention.self.query.weight', '_encoder.embedding.encoder.layer.2.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.9.attention.self.value.weight', '_encoder.embedding.encoder.layer.11.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.10.attention.self.value.bias', '_encoder.embedding.encoder.layer.11.attention.output.dense.bias', '_encoder.embedding.encoder.layer.5.attention.output.dense.weight', '_encoder.embedding.encoder.layer.2.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.5.attention.self.key.bias', '_encoder.embedding.encoder.layer.8.attention.output.dense.bias', '_encoder.embedding.encoder.layer.0.attention.output.dense.weight', '_encoder.embedding.encoder.layer.5.output.dense.weight', '_encoder.embedding.embeddings.word_embeddings.weight', '_encoder.embedding.encoder.layer.3.output.dense.bias', '_encoder.embedding.encoder.layer.7.attention.output.dense.weight', '_encoder.embedding.encoder.layer.7.attention.self.value.weight', '_encoder.embedding.encoder.layer.8.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.1.intermediate.dense.bias', '_encoder.embedding.encoder.layer.11.attention.self.value.weight', '_encoder.embedding.encoder.layer.4.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.0.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.2.attention.self.key.bias', '_encoder.embedding.encoder.layer.3.intermediate.dense.weight', '_encoder.embedding.encoder.layer.6.attention.output.dense.bias', '_encoder.embedding.encoder.layer.9.attention.self.key.weight', '_encoder.embedding.encoder.layer.11.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.0.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.9.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.0.intermediate.dense.weight', '_encoder.embedding.encoder.layer.7.attention.output.dense.bias', '_encoder.embedding.encoder.layer.6.output.dense.bias', '_encoder.embedding.encoder.layer.7.attention.self.query.weight', '_encoder.embedding.encoder.layer.8.intermediate.dense.bias', '_encoder.embedding.encoder.layer.4.attention.self.value.weight', '_encoder.embedding.encoder.layer.1.attention.output.dense.bias', '_encoder.embedding.encoder.layer.3.output.dense.weight', '_encoder.embedding.encoder.layer.4.attention.output.dense.weight', '_encoder.embedding.encoder.layer.6.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.8.output.dense.weight', '_encoder.embedding.encoder.layer.8.intermediate.dense.weight', '_encoder.embedding.encoder.layer.7.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.0.attention.self.value.bias', '_encoder.embedding.encoder.layer.3.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.2.attention.output.dense.bias', '_encoder.embedding.encoder.layer.0.attention.self.value.weight', '_encoder.embedding.encoder.layer.9.output.dense.bias', '_encoder.embedding.encoder.layer.0.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.2.output.dense.weight', '_encoder.embedding.encoder.layer.1.intermediate.dense.weight', '_encoder.embedding.encoder.layer.9.intermediate.dense.bias', '_encoder.embedding.encoder.layer.8.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.0.output.dense.weight', '_encoder.embedding.encoder.layer.2.attention.self.value.bias', '_encoder.embedding.encoder.layer.10.attention.output.dense.weight', '_encoder.embedding.encoder.layer.5.attention.self.query.weight', '_encoder.embedding.encoder.layer.10.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.9.output.dense.weight', '_encoder.embedding.encoder.layer.11.intermediate.dense.bias', '_encoder.embedding.embeddings.token_type_embeddings.weight', '_encoder.embedding.encoder.layer.5.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.2.attention.output.dense.weight', '_encoder.embedding.encoder.layer.7.attention.self.key.weight', '_encoder.embedding.encoder.layer.7.attention.self.key.bias', '_encoder.embedding.encoder.layer.8.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.9.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.6.attention.self.key.weight', '_encoder.embedding.encoder.layer.1.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.10.output.dense.weight', '_encoder.embedding.encoder.layer.4.attention.self.query.bias', '_encoder.embedding.encoder.layer.0.attention.self.key.weight', '_encoder.embedding.encoder.layer.9.attention.output.dense.bias', '_encoder.embedding.encoder.layer.1.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.5.output.dense.bias', '_encoder.embedding.encoder.layer.6.attention.self.value.weight', '_encoder.embedding.encoder.layer.3.attention.self.query.bias', '_encoder.embedding.encoder.layer.3.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.4.intermediate.dense.bias', '_encoder.embedding.encoder.layer.9.intermediate.dense.weight', '_encoder.embedding.pooler.dense.weight', '_encoder.embedding.encoder.layer.2.intermediate.dense.weight', '_encoder.embedding.encoder.layer.3.attention.self.query.weight', '_encoder.embedding.encoder.layer.6.attention.self.query.bias', '_encoder.embedding.encoder.layer.8.attention.self.value.weight', '_encoder.embedding.encoder.layer.3.attention.self.value.weight', '_encoder.embedding.encoder.layer.7.output.dense.weight', '_encoder.embedding.encoder.layer.0.output.dense.bias', '_encoder.embedding.encoder.layer.8.attention.self.query.weight', '_encoder.embedding.encoder.layer.9.attention.self.value.bias', '_encoder.embedding.encoder.layer.3.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.2.output.dense.bias', '_encoder.embedding.encoder.layer.11.intermediate.dense.weight', '_encoder.embedding.encoder.layer.8.attention.self.key.bias', '_encoder.embedding.embeddings.LayerNorm.weight', '_encoder.embedding.encoder.layer.4.attention.self.key.bias', '_encoder.embedding.encoder.layer.10.intermediate.dense.weight', '_encoder.embedding.encoder.layer.1.output.dense.bias', '_encoder.embedding.encoder.layer.7.attention.self.value.bias', '_encoder.embedding.encoder.layer.5.attention.self.query.bias', '_encoder.embedding.encoder.layer.5.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.7.intermediate.dense.weight', '_encoder.embedding.encoder.layer.1.attention.self.query.bias', '_encoder.embedding.encoder.layer.7.output.dense.bias', '_encoder.embedding.encoder.layer.6.intermediate.dense.weight', '_encoder.embedding.encoder.layer.11.attention.self.value.bias', '_encoder.embedding.encoder.layer.9.attention.self.query.weight', '_encoder.embedding.encoder.layer.9.attention.self.key.bias', '_encoder.embedding.encoder.layer.3.attention.output.dense.bias', '_encoder.embedding.encoder.layer.9.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.0.attention.self.key.bias', '_encoder.embedding.encoder.layer.0.attention.output.dense.bias', '_encoder.embedding.encoder.layer.3.attention.self.value.bias', '_encoder.embedding.encoder.layer.8.attention.output.dense.weight', '_encoder.embedding.encoder.layer.11.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.8.attention.self.value.bias', '_encoder.embedding.encoder.layer.4.attention.self.value.bias', '_encoder.embedding.encoder.layer.5.attention.output.dense.bias', '_encoder.embedding.encoder.layer.10.output.dense.bias', '_encoder.embedding.encoder.layer.4.output.dense.weight'], {'lr': 2e-05}
+2025-03-24 13:21:44,625 - INFO - allennlp.training.optimizers - Group 3: ['_encoder.lstm_embedding.weight', '_encoder.concat_trans.weight', '_encoder.concat_trans.bias', '_encoder.trans.weight', '_encoder.norm.weight', '_encoder.concat_norm.weight', '_encoder.norm.bias', '_encoder.concat_norm.bias', '_encoder.trans.bias'], {'lr': 0.001}
+2025-03-24 13:21:44,625 - INFO - allennlp.training.optimizers - Group 4: ['vit_model.decoder.decoder_layers.5.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.3.intermediate.dense.weight', 'vit_model.vit.encoder.layer.5.attention.output.dense.bias', 'vit_model.vit.encoder.layer.10.output.dense.bias', 'vit_model.decoder.decoder_layers.3.layernorm_before.weight', 'vit_model.vit.encoder.layer.10.layernorm_after.bias', 'vit_model.decoder.decoder_layers.1.layernorm_before.bias', 'vit_model.vit.encoder.layer.7.intermediate.dense.weight', 'vit_model.vit.encoder.layer.2.intermediate.dense.weight', '_encoder.lstm.bias_ih_l0', 'vit_model.decoder.decoder_layers.0.intermediate.dense.bias', 'vit_model.vit.encoder.layer.3.attention.output.dense.weight', '_output_projection_layer.weight', '_encoder.lstm.bias_hh_l0', 'vit_model.decoder.decoder_layers.4.attention.attention.value.bias', 'vit_model.vit.encoder.layer.4.layernorm_before.weight', 'vit_model.decoder.decoder_layers.4.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.4.attention.attention.query.weight', 'vit_model.vit.encoder.layer.8.intermediate.dense.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.query.weight', 'vit_model.vit.encoder.layer.2.attention.attention.value.bias', 'vit_model.vit.encoder.layer.2.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.0.layernorm_before.bias', 'vit_model.vit.encoder.layer.6.attention.attention.value.weight', 'vit_model.vit.encoder.layer.5.attention.attention.value.bias', 'vit_model.vit.encoder.layer.4.intermediate.dense.bias', 'vit_model.vit.encoder.layer.8.intermediate.dense.weight', 'vit_model.decoder.decoder_layers.6.output.dense.bias', 'vit_model.vit.encoder.layer.11.attention.attention.query.bias', 'vit_model.vit.encoder.layer.7.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.3.output.dense.bias', 'vit_model.vit.encoder.layer.3.intermediate.dense.weight', 'vit_model.vit.encoder.layer.7.layernorm_after.bias', '_encoder.merge_lstm.weight_ih_l0', 'vit_model.decoder.decoder_layers.7.attention.attention.query.bias', 'vit_model.vit.encoder.layer.9.output.dense.bias', 'vit_model.decoder.decoder_layers.6.attention.output.dense.weight', '_encoder.early_gru.weight_ih_l0', 'vit_model.vit.encoder.layer.3.layernorm_after.bias', 'vit_model.decoder.decoder_layers.3.output.dense.weight', '_decoder_cell.weight_ih', 'vit_model.decoder.decoder_layers.4.output.dense.bias', 'vit_model.decoder.decoder_layers.1.layernorm_after.bias', 'vit_model.vit.encoder.layer.3.output.dense.bias', 'vit_model.vit.embeddings.patch_embeddings.projection.bias', 'vit_model.vit.encoder.layer.6.output.dense.bias', 'vit_model.vit.encoder.layer.10.layernorm_after.weight', 'vit_model.vit.encoder.layer.10.layernorm_before.bias', 'vit_model.vit.encoder.layer.11.attention.attention.key.bias', 'vit_model.vit.encoder.layer.4.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.4.layernorm_before.bias', 'vit_model.decoder.decoder_layers.7.attention.attention.query.weight', 'vit_model.decoder.decoder_layers.5.output.dense.weight', 'vit_model.vit.encoder.layer.2.attention.attention.query.weight', '_target_embedder.weight', 'vit_model.vit.encoder.layer.0.attention.output.dense.bias', 'vit_model.vit.encoder.layer.1.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.6.intermediate.dense.weight', 'vit_model.decoder.decoder_layers.6.attention.attention.value.bias', 'vit_model.vit.encoder.layer.5.output.dense.weight', 'vit_model.decoder.decoder_layers.6.output.dense.weight', '_output_projection_layer.bias', 'vit_model.decoder.decoder_layers.2.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.7.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.5.layernorm_after.weight', 'vit_model.vit.encoder.layer.3.layernorm_before.weight', 'vit_model.decoder.decoder_layers.0.attention.attention.query.bias', 'vit_model.decoder.decoder_layers.4.layernorm_after.weight', '_encoder.merge_lstm.weight_hh_l0', 'vit_model.decoder.decoder_layers.6.layernorm_after.bias', 'vit_model.vit.encoder.layer.2.output.dense.bias', 'vit_model.decoder.decoder_layers.1.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.2.attention.attention.query.weight', 'vit_model.vit.encoder.layer.8.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.0.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.7.intermediate.dense.weight', 'vit_model.vit.encoder.layer.4.attention.output.dense.weight', 'vit_model.vit.encoder.layer.5.attention.attention.query.bias', 'vit_model.vit.encoder.layer.1.intermediate.dense.bias', 'vit_model.decoder.decoder_layers.1.output.dense.bias', 'vit_model.decoder.decoder_layers.1.attention.attention.value.weight', 'vit_model.decoder.decoder_layers.1.attention.attention.query.bias', 'vit_model.decoder.decoder_layers.0.attention.attention.value.weight', 'vit_model.decoder.decoder_layers.0.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.0.output.dense.weight', 'vit_model.decoder.decoder_layers.7.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.2.attention.attention.value.weight', 'vit_model.vit.encoder.layer.11.layernorm_before.bias', 'vit_model.vit.encoder.layer.6.attention.attention.query.weight', 'vit_model.decoder.decoder_layers.5.intermediate.dense.weight', 'vit_model.vit.encoder.layer.4.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.7.intermediate.dense.bias', 'vit_model.vit.encoder.layer.0.attention.attention.query.bias', 'vit_model.vit.encoder.layer.2.intermediate.dense.bias', 'vit_model.vit.encoder.layer.11.attention.output.dense.bias', 'vit_model.vit.encoder.layer.11.attention.attention.query.weight', 'vit_model.vit.encoder.layer.1.output.dense.weight', 'vit_model.vit.embeddings.cls_token', 'vit_model.decoder.decoder_layers.7.output.dense.bias', 'vit_model.vit.encoder.layer.9.attention.attention.key.weight', 'vit_model.vit.encoder.layer.6.layernorm_before.weight', 'vit_model.vit.encoder.layer.7.attention.output.dense.weight', 'vit_model.vit.encoder.layer.4.intermediate.dense.weight', 'vit_model.vit.encoder.layer.1.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.value.bias', 'vit_model.vit.encoder.layer.7.output.dense.weight', 'vit_model.vit.encoder.layer.0.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.6.intermediate.dense.bias', 'vit_model.decoder.decoder_layers.5.attention.attention.query.weight', 'vit_model.decoder.decoder_layers.4.layernorm_before.weight', 'vit_model.vit.encoder.layer.7.attention.attention.value.weight', 'vit_model.vit.encoder.layer.6.attention.attention.query.bias', 'vit_model.vit.encoder.layer.11.attention.attention.value.bias', 'vit_model.vit.encoder.layer.5.attention.attention.key.bias', 'vit_model.vit.encoder.layer.8.output.dense.bias', 'vit_model.vit.encoder.layer.7.layernorm_before.bias', 'vit_model.decoder.decoder_layers.6.layernorm_before.weight', 'vit_model.vit.encoder.layer.11.layernorm_after.weight', 'vit_model.decoder.decoder_layers.0.layernorm_after.weight', 'vit_model.decoder.decoder_layers.2.attention.attention.query.bias', 'vit_model.vit.encoder.layer.0.intermediate.dense.bias', 'vit_model.vit.encoder.layer.9.attention.attention.value.weight', 'vit_model.vit.encoder.layer.1.layernorm_before.bias', 'vit_model.decoder.decoder_layers.7.layernorm_after.bias', 'vit_model.decoder.decoder_layers.7.layernorm_after.weight', 'vit_model.vit.encoder.layer.8.layernorm_after.bias', 'vit_model.vit.encoder.layer.11.intermediate.dense.weight', 'vit_model.vit.encoder.layer.0.attention.attention.value.weight', 'vit_model.vit.encoder.layer.3.attention.attention.value.weight', '_encoder.merge_lstm.bias_hh_l0', '_decoder_cell.weight_hh', 'vit_model.decoder.decoder_layers.1.output.dense.weight', 'vit_model.vit.encoder.layer.3.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.4.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.5.layernorm_before.bias', 'vit_model.decoder.decoder_layers.3.intermediate.dense.bias', 'vit_model.vit.encoder.layer.5.layernorm_before.bias', 'vit_model.decoder.decoder_layers.2.attention.output.dense.weight', 'vit_model.vit.encoder.layer.5.output.dense.bias', 'vit_model.vit.encoder.layer.2.output.dense.weight', 'vit_model.vit.encoder.layer.9.layernorm_after.weight', 'vit_model.decoder.decoder_layers.6.attention.attention.query.weight', 'vit_model.vit.encoder.layer.1.layernorm_before.weight', 'vit_model.vit.encoder.layer.4.attention.attention.value.weight', 'vit_model.vit.encoder.layer.4.layernorm_after.weight', 'vit_model.vit.encoder.layer.4.output.dense.bias', 'vit_model.vit.encoder.layer.4.layernorm_before.bias', 'vit_model.decoder.decoder_layers.0.layernorm_before.weight', 'vit_model.decoder.decoder_layers.1.layernorm_after.weight', 'vit_model.decoder.decoder_layers.3.layernorm_after.bias', 'vit_model.vit.encoder.layer.1.attention.attention.key.weight', 'vit_model.vit.encoder.layer.8.layernorm_before.weight', 'vit_model.vit.encoder.layer.6.layernorm_after.weight', 'vit_model.vit.encoder.layer.8.attention.attention.key.weight', 'vit_model.vit.encoder.layer.3.layernorm_before.bias', 'vit_model.vit.layernorm.bias', 'vit_model.vit.encoder.layer.4.output.dense.weight', 'vit_model.vit.encoder.layer.2.layernorm_after.weight', 'vit_model.decoder.decoder_layers.0.intermediate.dense.weight', 'vit_model.decoder.decoder_layers.1.attention.output.dense.weight', 'vit_model.vit.encoder.layer.10.attention.attention.value.weight', 'vit_model.vit.encoder.layer.4.attention.output.dense.bias', 'vit_model.vit.encoder.layer.4.layernorm_after.bias', 'vit_model.vit.encoder.layer.10.intermediate.dense.weight', 'vit_model.vit.encoder.layer.8.attention.attention.value.weight', 'vit_model.decoder.decoder_layers.6.attention.output.dense.bias', 'vit_model.vit.encoder.layer.6.intermediate.dense.weight', 'vit_model.vit.encoder.layer.11.output.dense.bias', 'vit_model.vit.encoder.layer.8.layernorm_after.weight', 'vit_model.vit.encoder.layer.1.output.dense.bias', 'vit_model.decoder.decoder_layers.7.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.5.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.5.attention.attention.key.bias', 'vit_model.vit.encoder.layer.9.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.5.attention.attention.query.bias', 'vit_model.vit.encoder.layer.5.intermediate.dense.weight', 'vit_model.vit.encoder.layer.3.layernorm_after.weight', 'vit_model.vit.encoder.layer.2.layernorm_before.bias', 'vit_model.vit.encoder.layer.9.intermediate.dense.bias', 'vit_model.vit.encoder.layer.4.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.4.attention.attention.query.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.key.weight', 'vit_model.vit.encoder.layer.10.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.3.layernorm_after.weight', 'vit_model.decoder.decoder_layers.2.intermediate.dense.bias', 'vit_model.vit.encoder.layer.2.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.4.attention.attention.value.weight', 'vit_model.vit.encoder.layer.5.intermediate.dense.bias', 'vit_model.vit.encoder.layer.7.layernorm_before.weight', 'vit_model.vit.encoder.layer.1.intermediate.dense.weight', 'vit_model.vit.encoder.layer.0.intermediate.dense.weight', 'vit_model.decoder.decoder_layers.0.attention.attention.query.weight', 'vit_model.vit.encoder.layer.0.attention.attention.query.weight', 'vit_model.vit.encoder.layer.6.attention.attention.value.bias', 'vit_model.vit.encoder.layer.7.attention.attention.query.weight', 'vit_model.decoder.decoder_layers.7.attention.output.dense.bias', '_encoder.early_gru.bias_ih_l0', 'vit_model.decoder.decoder_layers.6.attention.attention.query.bias', 'vit_model.vit.encoder.layer.11.intermediate.dense.bias', 'vit_model.decoder.decoder_layers.1.intermediate.dense.bias', 'vit_model.vit.encoder.layer.9.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.4.attention.attention.key.weight', 'vit_model.vit.encoder.layer.5.attention.attention.key.weight', 'vit_model.vit.encoder.layer.6.output.dense.weight', 'vit_model.vit.encoder.layer.3.output.dense.weight', '_encoder.lstm.weight_ih_l0', 'vit_model.vit.encoder.layer.9.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.4.attention.attention.key.bias', 'vit_model.vit.encoder.layer.6.layernorm_before.bias', 'vit_model.vit.encoder.layer.0.attention.attention.key.weight', 'vit_model.vit.encoder.layer.0.attention.attention.key.bias', '_decoder_cell.bias_hh', 'vit_model.vit.encoder.layer.7.intermediate.dense.bias', 'vit_model.vit.encoder.layer.10.intermediate.dense.bias', 'vit_model.vit.encoder.layer.3.intermediate.dense.bias', 'vit_model.vit.encoder.layer.11.layernorm_after.bias', 'vit_model.decoder.decoder_layers.2.output.dense.weight', 'vit_model.vit.encoder.layer.10.output.dense.weight', 'vit_model.decoder.decoder_layers.3.layernorm_before.bias', 'vit_model.vit.encoder.layer.1.attention.attention.value.bias', 'vit_model.vit.encoder.layer.1.layernorm_after.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.value.weight', '_decoder_cell.bias_ih', 'vit_model.decoder.decoder_layers.5.attention.attention.value.weight', 'vit_model.vit.encoder.layer.6.layernorm_after.bias', '_encoder.merge_norm.bias', '_encoder.early_gru.weight_hh_l0', 'vit_model.vit.encoder.layer.1.attention.attention.query.bias', 'vit_model.vit.encoder.layer.9.output.dense.weight', 'vit_model.vit.encoder.layer.11.layernorm_before.weight', 'vit_model.decoder.decoder_layers.7.layernorm_before.weight', 'vit_model.decoder.decoder_pred.bias', 'vit_model.vit.encoder.layer.4.attention.attention.query.weight', 'vit_model.decoder.decoder_layers.4.intermediate.dense.weight', 'vit_model.vit.encoder.layer.8.attention.attention.query.bias', 'vit_model.decoder.decoder_layers.5.attention.output.dense.weight', 'vit_model.vit.encoder.layer.2.attention.attention.key.weight', 'vit_model.vit.encoder.layer.11.output.dense.weight', 'vit_model.decoder.decoder_layers.6.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.2.output.dense.bias', 'vit_model.vit.encoder.layer.8.output.dense.weight', 'vit_model.decoder.decoder_norm.bias', 'vit_model.decoder.decoder_layers.6.layernorm_before.bias', 'vit_model.vit.encoder.layer.11.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.7.output.dense.weight', 'vit_model.vit.encoder.layer.10.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.6.attention.attention.value.weight', 'vit_model.vit.encoder.layer.6.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.2.layernorm_before.weight', 'vit_model.vit.encoder.layer.2.layernorm_before.weight', 'vit_model.decoder.decoder_embed.bias', 'vit_model.vit.encoder.layer.7.attention.attention.query.bias', 'vit_model.vit.encoder.layer.2.attention.output.dense.weight', 'vit_model.vit.encoder.layer.10.layernorm_before.weight', 'vit_model.decoder.decoder_layers.6.attention.attention.key.weight', '_encoder.merge_mlp.bias', 'vit_model.decoder.decoder_layers.1.layernorm_before.weight', 'vit_model.vit.encoder.layer.3.attention.attention.query.weight', 'vit_model.vit.encoder.layer.10.attention.attention.query.weight', 'vit_model.vit.encoder.layer.10.attention.attention.query.bias', 'vit_model.decoder.decoder_pred.weight', 'vit_model.decoder.decoder_layers.1.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.0.attention.attention.value.bias', 'vit_model.vit.encoder.layer.3.attention.attention.key.weight', 'vit_model.vit.encoder.layer.11.attention.attention.value.weight', 'vit_model.decoder.decoder_layers.1.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.0.attention.attention.key.weight', 'vit_model.vit.encoder.layer.10.attention.attention.key.bias', 'vit_model.vit.embeddings.patch_embeddings.projection.weight', 'vit_model.decoder.decoder_layers.7.layernorm_before.bias', '_encoder.merge_mlp.weight', 'vit_model.vit.encoder.layer.8.layernorm_before.bias', 'vit_model.vit.encoder.layer.8.attention.attention.query.weight', 'vit_model.vit.encoder.layer.5.attention.output.dense.weight', 'vit_model.vit.encoder.layer.0.output.dense.weight', 'vit_model.decoder.decoder_layers.5.intermediate.dense.bias', 'vit_model.vit.encoder.layer.9.attention.attention.query.weight', 'vit_model.vit.encoder.layer.1.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.4.layernorm_after.bias', '_encoder.early_gru.bias_hh_l0', 'vit_model.vit.encoder.layer.5.attention.attention.value.weight', 'vit_model.decoder.decoder_layers.0.output.dense.bias', '_encoder.merge_lstm.bias_ih_l0', 'vit_model.vit.encoder.layer.7.layernorm_after.weight', 'vit_model.decoder.decoder_layers.2.layernorm_after.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.query.bias', 'vit_model.decoder.decoder_layers.5.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.5.layernorm_after.bias', 'vit_model.decoder.decoder_layers.3.attention.output.dense.weight', 'vit_model.decoder.decoder_embed.weight', 'vit_model.decoder.decoder_layers.0.layernorm_after.bias', 'vit_model.vit.encoder.layer.0.layernorm_before.bias', 'vit_model.decoder.decoder_layers.2.layernorm_before.bias', 'vit_model.vit.encoder.layer.3.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.4.intermediate.dense.bias', 'vit_model.vit.encoder.layer.0.output.dense.bias', 'vit_model.decoder.decoder_layers.2.layernorm_after.weight', 'vit_model.vit.encoder.layer.1.attention.attention.value.weight', 'vit_model.vit.encoder.layer.7.output.dense.bias', 'vit_model.vit.encoder.layer.6.attention.output.dense.bias', 'vit_model.vit.encoder.layer.9.attention.output.dense.weight', 'vit_model.vit.encoder.layer.7.attention.attention.key.weight', '_encoder.lstm.weight_hh_l0', 'vit_model.vit.encoder.layer.5.attention.attention.query.weight', 'vit_model.vit.encoder.layer.0.attention.output.dense.weight', 'vit_model.vit.encoder.layer.0.layernorm_after.bias', 'vit_model.decoder.decoder_layers.2.intermediate.dense.weight', 'vit_model.vit.encoder.layer.8.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.3.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.2.attention.attention.value.bias', 'vit_model.vit.encoder.layer.2.attention.attention.value.weight', 'vit_model.vit.encoder.layer.6.intermediate.dense.bias', 'vit_model.vit.encoder.layer.2.attention.attention.query.bias', 'vit_model.vit.encoder.layer.3.attention.attention.query.bias', 'vit_model.vit.encoder.layer.8.attention.attention.value.bias', '_encoder.merge_norm.weight', 'vit_model.vit.encoder.layer.9.intermediate.dense.weight', 'vit_model.vit.encoder.layer.8.attention.output.dense.bias', 'vit_model.vit.encoder.layer.10.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.1.attention.attention.query.weight', 'vit_model.vit.encoder.layer.6.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.0.attention.output.dense.weight', 'vit_model.vit.encoder.layer.6.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.1.attention.attention.value.bias', 'vit_model.vit.encoder.layer.9.layernorm_before.bias', 'vit_model.vit.encoder.layer.11.attention.output.dense.weight', 'vit_model.vit.encoder.layer.10.attention.attention.value.bias', 'vit_model.vit.encoder.layer.7.attention.output.dense.bias', 'vit_model.vit.encoder.layer.9.layernorm_after.bias', 'vit_model.decoder.decoder_layers.5.layernorm_before.weight', 'vit_model.vit.encoder.layer.5.layernorm_before.weight', 'vit_model.decoder.decoder_layers.1.intermediate.dense.weight', 'vit_model.decoder.decoder_layers.2.attention.attention.key.bias', 'vit_model.decoder.mask_token', 'vit_model.decoder.decoder_layers.6.layernorm_after.weight', 'vit_model.vit.encoder.layer.5.layernorm_after.weight', 'vit_model.decoder.decoder_layers.4.output.dense.weight', 'vit_model.vit.layernorm.weight', 'vit_model.decoder.decoder_layers.2.attention.attention.key.weight', 'vit_model.vit.encoder.layer.5.layernorm_after.bias', 'vit_model.vit.encoder.layer.0.layernorm_after.weight', 'vit_model.vit.encoder.layer.2.layernorm_after.bias', 'vit_model.vit.encoder.layer.0.layernorm_before.weight', 'vit_model.vit.encoder.layer.9.layernorm_before.weight', 'vit_model.decoder.decoder_layers.5.output.dense.bias', 'vit_model.vit.encoder.layer.1.attention.attention.query.weight', 'vit_model.vit.encoder.layer.1.layernorm_after.weight', 'vit_model.decoder.decoder_layers.7.attention.attention.value.weight', 'vit_model.vit.encoder.layer.4.attention.attention.query.bias', 'vit_model.decoder.decoder_norm.weight', 'vit_model.vit.encoder.layer.9.attention.attention.query.bias', 'vit_model.vit.encoder.layer.3.attention.attention.key.bias', 'vit_model.vit.encoder.layer.7.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.7.attention.attention.value.bias'], {}
+2025-03-24 13:21:44,631 - WARNING - allennlp.training.optimizers - When constructing parameter groups,  resnet not match any parameter name
+2025-03-24 13:21:44,631 - WARNING - allennlp.training.optimizers - When constructing parameter groups,  source_embedder not match any parameter name
+2025-03-24 13:21:44,631 - WARNING - allennlp.training.optimizers - When constructing parameter groups,  encoder.concat_trans_ not match any parameter name
+2025-03-24 13:21:44,632 - INFO - allennlp.training.optimizers - Number of trainable parameters: 292199006
+2025-03-24 13:21:44,632 - INFO - allennlp.common.params - trainer.optimizer.infer_type_and_cast = True
+2025-03-24 13:21:44,632 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently.
+2025-03-24 13:21:44,632 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: 
+2025-03-24 13:21:44,632 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.001
+2025-03-24 13:21:44,633 - INFO - allennlp.common.registrable - instantiating registered subclass adam of <class 'allennlp.training.optimizers.Optimizer'>
+2025-03-24 13:21:44,635 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = reduce_on_plateau
+2025-03-24 13:21:44,635 - INFO - allennlp.common.registrable - instantiating registered subclass reduce_on_plateau of <class 'allennlp.training.learning_rate_schedulers.learning_rate_scheduler.LearningRateScheduler'>
+2025-03-24 13:21:44,635 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently.
+2025-03-24 13:21:44,635 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: 
+2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.factor = 0.6
+2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.mode = max
+2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.patience = 5
+2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.num_serialized_models_to_keep = 20
+2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.keep_serialized_model_every_num_seconds = None
+2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.model_save_interval = None
+2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.summary_interval = 100
+2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.histogram_interval = None
+2025-03-24 13:21:44,637 - INFO - allennlp.common.params - trainer.should_log_parameter_statistics = True
+2025-03-24 13:21:44,637 - INFO - allennlp.common.params - trainer.should_log_learning_rate = False
+2025-03-24 13:21:44,637 - INFO - allennlp.common.params - trainer.log_batch_size_period = None
+2025-03-24 13:21:44,637 - WARNING - allennlp.training.trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled
+2025-03-24 13:21:44,784 - INFO - allennlp.training.trainer - Beginning training.
+2025-03-24 13:21:44,784 - INFO - allennlp.training.trainer - Epoch 0/99
+2025-03-24 13:21:44,784 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:21:45,113 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:21:45,114 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:21:45,114 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:21:45,115 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:21:45,115 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 1974
+2025-03-24 13:21:45,115 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:21:45,115 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:21:45,127 - INFO - allennlp.training.trainer - Training
+2025-03-24 13:23:05,615 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:23:13,876 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:23:13,876 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:23:13,878 - INFO - allennlp.training.tensorboard_writer - loss            |     1.687  |     1.203
+2025-03-24 13:23:13,879 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:23:13,879 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  1974.000  |       N/A
+2025-03-24 13:23:13,880 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:23:13,880 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.233
+2025-03-24 13:23:13,880 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:23:13,881 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:23:13,881 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.197
+2025-03-24 13:23:13,882 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.367
+2025-03-24 13:23:13,882 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:23:13,883 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:23:14,111 - INFO - allennlp.training.trainer - Epoch duration: 0:01:29.326816
+2025-03-24 13:23:14,112 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:27:23
+2025-03-24 13:23:14,112 - INFO - allennlp.training.trainer - Epoch 1/99
+2025-03-24 13:23:14,112 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:23:14,418 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:23:14,419 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:23:14,419 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:23:14,419 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:23:14,420 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17628
+2025-03-24 13:23:14,420 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:23:14,420 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:23:14,432 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_divide', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_2']], [['g_divide', 'N_1', 'N_2']], [['g_divide', 'N_1', 'N_2']], [['g_bili', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_1']], [['g_divide', 'N_1', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_divide', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']]]
+2025-03-24 13:24:31,319 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:24:39,564 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:24:39,565 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:24:39,566 - INFO - allennlp.training.tensorboard_writer - loss            |     1.122  |     1.045
+2025-03-24 13:24:39,567 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:24:39,568 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17628.000  |       N/A
+2025-03-24 13:24:39,568 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:24:39,568 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.353
+2025-03-24 13:24:39,569 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:24:39,569 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:24:39,570 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.167
+2025-03-24 13:24:39,570 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.425
+2025-03-24 13:24:39,571 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:24:39,571 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:24:39,625 - INFO - allennlp.training.trainer - Epoch duration: 0:01:25.512708
+2025-03-24 13:24:39,625 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:22:47
+2025-03-24 13:24:39,625 - INFO - allennlp.training.trainer - Epoch 2/99
+2025-03-24 13:24:39,625 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:24:39,953 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1']]]
+2025-03-24 13:25:59,334 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:26:07,944 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:26:07,944 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:26:07,946 - INFO - allennlp.training.tensorboard_writer - loss            |     1.003  |     0.999
+2025-03-24 13:26:07,946 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:26:07,946 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:26:07,947 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:26:07,947 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.281
+2025-03-24 13:26:07,947 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:26:07,947 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:26:07,948 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.174
+2025-03-24 13:26:07,948 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.418
+2025-03-24 13:26:07,949 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:26:07,949 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:26:08,006 - INFO - allennlp.training.trainer - Epoch duration: 0:01:28.381453
+2025-03-24 13:26:08,006 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:21:50
+2025-03-24 13:26:08,007 - INFO - allennlp.training.trainer - Epoch 3/99
+2025-03-24 13:26:08,007 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:26:08,383 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:26:08,397 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_0']], [['g_divide', 'N_0', 'N_1']], [['g_divide', 'N_0', 'N_1']], [['g_divide', 'N_1', 'N_2']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'N_2', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_divide', 'N_2', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'N_1', 'N_0']]]
+selected_programs [[['g_equal', 'N_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['gougu_minus', 'N_0', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'V_0', 'N_0']]]
+2025-03-24 13:27:26,778 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:27:35,969 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:27:35,969 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:27:35,970 - INFO - allennlp.training.tensorboard_writer - loss            |     0.942  |     0.928
+2025-03-24 13:27:35,971 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:27:35,971 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:27:35,971 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:27:35,972 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.312
+2025-03-24 13:27:35,972 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:27:35,973 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:27:35,973 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.173
+2025-03-24 13:27:35,974 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.417
+2025-03-24 13:27:35,974 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:27:35,974 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:27:35,998 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.991514
+2025-03-24 13:27:35,998 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:20:29
+2025-03-24 13:27:35,998 - INFO - allennlp.training.trainer - Epoch 4/99
+2025-03-24 13:27:35,998 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:27:36,307 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:27:36,308 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:27:36,308 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:27:36,308 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:27:36,309 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:27:36,309 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:27:36,309 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:27:36,321 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_minus', 'N_2', 'N_3', 'g_add', 'V_0', 'N_3']], [['g_minus', 'N_2', 'N_3', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_bili', 'N_2', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['gougu_add', 'N_1', 'N_2', 'g_bili', 'N_2', 'N_1', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']]]
+selected_programs [[['g_double', 'N_0']], [['g_half', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_add', 'V_0', 'N_2']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_equal', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['gougu_minus', 'N_1', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]]
+2025-03-24 13:28:55,277 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:29:04,926 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:29:04,928 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:29:04,929 - INFO - allennlp.training.tensorboard_writer - loss            |     0.895  |     0.908
+2025-03-24 13:29:04,930 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:29:04,930 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:29:04,930 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:29:04,931 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.324
+2025-03-24 13:29:04,931 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:29:04,931 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:29:04,932 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.156
+2025-03-24 13:29:04,932 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.432
+2025-03-24 13:29:04,932 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:29:04,933 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:29:04,976 - INFO - allennlp.training.trainer - Epoch duration: 0:01:28.977988
+2025-03-24 13:29:04,976 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:19:23
+2025-03-24 13:29:04,977 - INFO - allennlp.training.trainer - Epoch 5/99
+2025-03-24 13:29:04,977 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:29:05,303 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:29:05,304 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:29:05,304 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:29:05,304 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:29:05,304 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:29:05,305 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:29:05,305 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:29:05,317 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_equal', 'N_0']]]
+2025-03-24 13:30:21,718 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:30:29,999 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:30:30,001 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:30:30,002 - INFO - allennlp.training.tensorboard_writer - loss            |     0.845  |     0.866
+2025-03-24 13:30:30,003 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:30:30,004 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:30:30,004 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:30:30,004 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.352
+2025-03-24 13:30:30,005 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:30:30,005 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:30:30,006 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.132
+2025-03-24 13:30:30,006 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.443
+2025-03-24 13:30:30,007 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:30:30,007 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:30:30,058 - INFO - allennlp.training.trainer - Epoch duration: 0:01:25.081708
+2025-03-24 13:30:30,058 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:17:09
+2025-03-24 13:30:30,059 - INFO - allennlp.training.trainer - Epoch 6/99
+2025-03-24 13:30:30,059 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:30:30,411 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:30:30,412 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:30:30,412 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:30:30,412 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:30:30,412 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:30:30,413 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:30:30,413 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:30:30,425 - INFO - allennlp.training.trainer - Training
+2025-03-24 13:31:44,806 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:31:53,191 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:31:53,191 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:31:53,191 - INFO - allennlp.training.tensorboard_writer - loss            |     0.805  |     0.839
+2025-03-24 13:31:53,192 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:31:53,193 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:31:53,193 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:31:53,194 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.348
+2025-03-24 13:31:53,194 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:31:53,194 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:31:53,195 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.130
+2025-03-24 13:31:53,195 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.444
+2025-03-24 13:31:53,195 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:31:53,196 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:31:53,230 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.171465
+2025-03-24 13:31:53,230 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:14:43
+2025-03-24 13:31:53,230 - INFO - allennlp.training.trainer - Epoch 7/99
+2025-03-24 13:31:53,230 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:31:53,647 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:31:53,648 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:31:53,648 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:31:53,649 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:31:53,649 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:31:53,649 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:31:53,649 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:31:53,662 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_add', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_3']], [['gougu_add', 'N_1', 'N_2', 'g_add', 'V_0', 'N_0']], [['g_sin', 'N_2', 'g_divide', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_sin', 'N_2', 'g_mul', 'N_0', 'N_1', 'g_minus', 'V_1', 'V_0']], [['g_equal', 'N_2']], [['g_add', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_equal', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1', 'g_add', 'V_2', 'N_0']], [['g_half', 'N_2']], [['g_minus', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1']], [['cal_circle_area', 'N_0']], [['cal_circle_area', 'N_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'N_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_half', 'N_0', 'g_mul', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']]]
+2025-03-24 13:33:08,173 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:33:16,530 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:33:16,531 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:33:16,532 - INFO - allennlp.training.tensorboard_writer - loss            |     0.771  |     0.832
+2025-03-24 13:33:16,533 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:33:16,533 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:33:16,534 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:33:16,534 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.378
+2025-03-24 13:33:16,534 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:33:16,535 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:33:16,535 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.135
+2025-03-24 13:33:16,536 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.458
+2025-03-24 13:33:16,536 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:33:16,537 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:33:16,594 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.363477
+2025-03-24 13:33:16,594 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:12:35
+2025-03-24 13:33:16,594 - INFO - allennlp.training.trainer - Epoch 8/99
+2025-03-24 13:33:16,594 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:33:16,935 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:33:16,949 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_add', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'C_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_equal', 'N_0']], [['g_equal', 'N_0']], [['g_bili', 'N_1', 'N_2', 'N_3', 'gougu_add', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1', 'g_add', 'V_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_2', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_1']]]
+2025-03-24 13:34:30,826 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:34:39,104 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:34:39,104 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:34:39,105 - INFO - allennlp.training.tensorboard_writer - loss            |     0.737  |     0.797
+2025-03-24 13:34:39,106 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:34:39,106 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:34:39,107 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:34:39,107 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.381
+2025-03-24 13:34:39,108 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:34:39,108 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:34:39,108 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.111
+2025-03-24 13:34:39,109 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.483
+2025-03-24 13:34:39,109 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:34:39,110 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:34:39,139 - INFO - allennlp.training.trainer - Epoch duration: 0:01:22.545359
+2025-03-24 13:34:39,140 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:10:29
+2025-03-24 13:34:39,140 - INFO - allennlp.training.trainer - Epoch 9/99
+2025-03-24 13:34:39,140 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:34:39,451 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:34:39,452 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:34:39,452 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:34:39,452 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:34:39,452 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:34:39,453 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:34:39,453 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:34:39,465 - INFO - allennlp.training.trainer - Training
+2025-03-24 13:35:54,133 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:36:03,253 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:36:03,254 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:36:03,255 - INFO - allennlp.training.tensorboard_writer - loss            |     0.718  |     0.801
+2025-03-24 13:36:03,256 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:36:03,256 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:36:03,257 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:36:03,257 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.353
+2025-03-24 13:36:03,257 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:36:03,258 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:36:03,258 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.115
+2025-03-24 13:36:03,258 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.464
+2025-03-24 13:36:03,259 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:36:03,260 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:36:03,291 - INFO - allennlp.training.trainer - Epoch duration: 0:01:24.151439
+2025-03-24 13:36:03,291 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:08:46
+2025-03-24 13:36:03,291 - INFO - allennlp.training.trainer - Epoch 10/99
+2025-03-24 13:36:03,291 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:36:03,619 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:36:03,620 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:36:03,621 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:36:03,621 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:36:03,621 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:36:03,621 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:36:03,621 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:36:03,634 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'C_2']], [['g_minus', 'N_1', 'N_0']], [['g_sin', 'N_3', 'g_divide', 'N_4', 'N_4']], [['g_add', 'N_2', 'N_3']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['gougu_minus', 'N_2', 'N_1', 'g_bili', 'V_0', 'N_2', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_equal', 'N_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_minus', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_1', 'N_2']], [['g_minus', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_1', 'g_half', 'V_0']], [['cal_circle_area', 'N_0']], [['cal_circle_area', 'N_0']], [['g_half', 'N_0', 'cal_circle_area', 'V_0']], [['cal_circle_area', 'N_1', 'g_divide', 'N_0', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'V_0', 'N_2']], [['g_half', 'N_0', 'g_mul', 'V_0', 'N_0']], [['g_mul', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0']], [['g_half', 'N_1', 'g_sin', 'V_0', 'g_mul', 'N_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_mul', 'N_1', 'N_2', 'g_half', 'V_0']]]
+2025-03-24 13:37:23,297 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:37:33,096 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:37:33,099 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:37:33,100 - INFO - allennlp.training.tensorboard_writer - loss            |     0.680  |     0.790
+2025-03-24 13:37:33,100 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:37:33,100 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:37:33,101 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:37:33,101 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.431
+2025-03-24 13:37:33,101 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:37:33,101 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:37:33,102 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.121
+2025-03-24 13:37:33,102 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.524
+2025-03-24 13:37:33,102 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:37:33,104 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:37:33,146 - INFO - allennlp.training.trainer - Epoch duration: 0:01:29.854896
+2025-03-24 13:37:33,146 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:07:53
+2025-03-24 13:37:33,146 - INFO - allennlp.training.trainer - Epoch 11/99
+2025-03-24 13:37:33,147 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:37:33,530 - INFO - allennlp.training.trainer - Training
+2025-03-24 13:38:52,808 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:39:01,396 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:39:01,409 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:39:01,411 - INFO - allennlp.training.tensorboard_writer - loss            |     0.656  |     0.764
+2025-03-24 13:39:01,412 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:39:01,413 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:39:01,414 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:39:01,414 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.408
+2025-03-24 13:39:01,415 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:39:01,416 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:39:01,416 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.118
+2025-03-24 13:39:01,417 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.498
+2025-03-24 13:39:01,417 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:39:01,418 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:39:01,461 - INFO - allennlp.training.trainer - Epoch duration: 0:01:28.314099
+2025-03-24 13:39:01,461 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:06:42
+2025-03-24 13:39:01,461 - INFO - allennlp.training.trainer - Epoch 12/99
+2025-03-24 13:39:01,461 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:39:01,787 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:39:01,788 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:39:01,788 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:39:01,788 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:39:01,788 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:39:01,788 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:39:01,789 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:39:01,803 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_equal', 'N_1']], [['g_sin', 'N_2', 'g_mul', 'N_3', 'V_0']], [['g_sin', 'N_2', 'g_mul', 'N_0', 'V_0']], [['g_equal', 'N_1']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_1', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_2', 'N_3']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_mul', 'N_2', 'V_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_add', 'N_0', 'N_2', 'g_add', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'V_0', 'N_0']], [['g_tan', 'N_1', 'g_mul', 'V_0', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_double', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']]]
+selected_programs [[['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'cal_circle_area', 'V_0']], [['g_divide', 'N_1', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_divide', 'N_1', 'N_2']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_mul', 'N_1', 'N_2', 'g_half', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_mul', 'N_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1', 'g_add', 'V_2', 'N_2']], [['g_double', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_1', 'V_0', 'N_0']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']]]
+2025-03-24 13:40:20,583 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:40:29,378 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:40:29,380 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:40:29,381 - INFO - allennlp.training.tensorboard_writer - loss            |     0.631  |     0.767
+2025-03-24 13:40:29,382 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:40:29,383 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:40:29,383 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:40:29,384 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.431
+2025-03-24 13:40:29,384 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:40:29,384 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:40:29,385 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.113
+2025-03-24 13:40:29,385 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.511
+2025-03-24 13:40:29,386 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:40:29,386 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:40:29,437 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.975695
+2025-03-24 13:40:29,437 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:05:26
+2025-03-24 13:40:29,437 - INFO - allennlp.training.trainer - Epoch 13/99
+2025-03-24 13:40:29,437 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:40:29,763 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:40:29,764 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:40:29,765 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:40:29,765 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:40:29,765 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:40:29,765 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:40:29,765 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:40:29,779 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'C_2']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['gougu_add', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_1']], [['gougu_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_2', 'gougu_minus', 'V_1', 'N_2']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_1', 'g_add', 'V_0', 'N_0']]]
+selected_programs [[['g_mul', 'N_1', 'N_3', 'g_mul', 'V_0', 'N_3']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_add', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0']]]
+2025-03-24 13:41:48,644 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:41:57,084 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:41:57,085 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:41:57,086 - INFO - allennlp.training.tensorboard_writer - loss            |     0.591  |     0.752
+2025-03-24 13:41:57,086 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.504
+2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.096
+2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.560
+2025-03-24 13:41:57,088 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:41:57,089 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:41:57,145 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.708350
+2025-03-24 13:41:57,145 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:04:07
+2025-03-24 13:41:57,146 - INFO - allennlp.training.trainer - Epoch 14/99
+2025-03-24 13:41:57,146 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:41:57,599 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:41:57,600 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:41:57,600 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:41:57,600 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:41:57,600 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:41:57,600 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:41:57,601 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:41:57,613 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['gougu_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_1', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']]]
+2025-03-24 13:43:15,814 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:43:24,259 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:43:24,260 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:43:24,260 - INFO - allennlp.training.tensorboard_writer - loss            |     0.566  |     0.735
+2025-03-24 13:43:24,261 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:43:24,261 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:43:24,261 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:43:24,261 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.543
+2025-03-24 13:43:24,261 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:43:24,262 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:43:24,262 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.096
+2025-03-24 13:43:24,262 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.578
+2025-03-24 13:43:24,262 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:43:24,263 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:43:24,311 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.165195
+2025-03-24 13:43:24,311 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:02:43
+2025-03-24 13:43:24,311 - INFO - allennlp.training.trainer - Epoch 15/99
+2025-03-24 13:43:24,311 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:43:24,634 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:43:24,635 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:43:24,635 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:43:24,635 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:43:24,635 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:43:24,636 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:43:24,636 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:43:24,648 - INFO - allennlp.training.trainer - Training
+2025-03-24 13:44:42,871 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:44:51,532 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:44:51,534 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:44:51,535 - INFO - allennlp.training.tensorboard_writer - loss            |     0.533  |     0.725
+2025-03-24 13:44:51,535 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:44:51,535 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:44:51,536 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:44:51,536 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.507
+2025-03-24 13:44:51,537 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:44:51,538 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:44:51,538 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.102
+2025-03-24 13:44:51,538 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.554
+2025-03-24 13:44:51,538 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:44:51,539 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:44:51,565 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.254279
+2025-03-24 13:44:51,565 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:01:20
+2025-03-24 13:44:51,566 - INFO - allennlp.training.trainer - Epoch 16/99
+2025-03-24 13:44:51,566 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:44:51,919 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:44:51,920 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:44:51,921 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:44:51,921 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:44:51,921 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:44:51,921 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:44:51,921 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:44:51,934 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['gougu_minus', 'N_0', 'N_1']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_double', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_half', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0']]]
+2025-03-24 13:46:10,443 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:46:18,882 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:46:18,883 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:46:18,884 - INFO - allennlp.training.tensorboard_writer - loss            |     0.509  |     0.737
+2025-03-24 13:46:18,884 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:46:18,884 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:46:18,886 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:46:18,886 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.511
+2025-03-24 13:46:18,887 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:46:18,887 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:46:18,887 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.109
+2025-03-24 13:46:18,887 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.554
+2025-03-24 13:46:18,888 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:46:18,889 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:46:19,004 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.437937
+2025-03-24 13:46:19,004 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:59:57
+2025-03-24 13:46:19,004 - INFO - allennlp.training.trainer - Epoch 17/99
+2025-03-24 13:46:19,004 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:46:19,355 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:46:19,356 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:46:19,357 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:46:19,357 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:46:19,357 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:46:19,357 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:46:19,357 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:46:19,370 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_2', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'gougu_minus', 'N_0', 'V_1', 'g_double', 'V_2']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'gougu_minus', 'N_0', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0']]]
+selected_programs [[['g_equal', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_add', 'C_6', 'N_3', 'g_add', 'V_0', 'N_4']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_2']], [['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0']]]
+selected_programs [[['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_equal', 'C_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_2', 'g_double', 'N_1', 'g_minus', 'V_1', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_equal', 'N_0']]]
+2025-03-24 13:47:36,937 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:47:45,532 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:47:45,538 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:47:45,539 - INFO - allennlp.training.tensorboard_writer - loss            |     0.485  |     0.764
+2025-03-24 13:47:45,539 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:47:45,541 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:47:45,541 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:47:45,541 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.560
+2025-03-24 13:47:45,542 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:47:45,542 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:47:45,542 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.092
+2025-03-24 13:47:45,542 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.571
+2025-03-24 13:47:45,543 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:47:45,543 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:47:45,594 - INFO - allennlp.training.trainer - Epoch duration: 0:01:26.590571
+2025-03-24 13:47:45,595 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:58:30
+2025-03-24 13:47:45,595 - INFO - allennlp.training.trainer - Epoch 18/99
+2025-03-24 13:47:45,595 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:47:46,026 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:47:46,027 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:47:46,028 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:47:46,028 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:47:46,028 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:47:46,028 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:47:46,028 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:47:46,042 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_0', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_2', 'g_add', 'V_0', 'N_3']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']]]
+selected_programs [[['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_add', 'N_0', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_sin', 'N_0', 'g_divide', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'C_2', 'g_minus', 'V_2', 'C_2']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_2']], [['g_tan', 'N_0', 'g_mul', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_equal', 'N_0']]]
+selected_programs [[['g_equal', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_equal', 'N_3']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]]
+2025-03-24 13:49:05,894 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:49:15,788 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:49:15,817 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:49:15,818 - INFO - allennlp.training.tensorboard_writer - loss            |     0.469  |     0.773
+2025-03-24 13:49:15,819 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:49:15,819 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:49:15,820 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:49:15,820 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.550
+2025-03-24 13:49:15,821 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:49:15,821 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:49:15,821 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.122
+2025-03-24 13:49:15,822 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.565
+2025-03-24 13:49:15,823 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:49:15,823 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:49:15,845 - INFO - allennlp.training.trainer - Epoch duration: 0:01:30.250664
+2025-03-24 13:49:15,845 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:57:18
+2025-03-24 13:49:15,846 - INFO - allennlp.training.trainer - Epoch 19/99
+2025-03-24 13:49:15,846 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:49:16,150 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:49:16,151 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:49:16,151 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:49:16,152 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:49:16,152 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:49:16,152 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:49:16,152 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:49:16,165 - INFO - allennlp.training.trainer - Training
+2025-03-24 13:50:31,543 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:50:39,965 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:50:39,976 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:50:39,978 - INFO - allennlp.training.tensorboard_writer - loss            |     0.445  |     0.776
+2025-03-24 13:50:39,980 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:50:39,980 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:50:39,980 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:50:39,980 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.537
+2025-03-24 13:50:39,980 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:50:39,981 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:50:39,981 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.094
+2025-03-24 13:50:39,982 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.568
+2025-03-24 13:50:39,983 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:50:39,983 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:50:40,028 - INFO - allennlp.training.trainer - Epoch duration: 0:01:24.182009
+2025-03-24 13:50:40,028 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:55:40
+2025-03-24 13:50:40,028 - INFO - allennlp.training.trainer - Epoch 20/99
+2025-03-24 13:50:40,028 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:50:40,339 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:50:40,340 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:50:40,340 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:50:40,340 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:50:40,341 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:50:40,341 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:50:40,341 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:50:40,354 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_add', 'V_0', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_equal', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'C_2']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0']]]
+2025-03-24 13:51:59,973 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:52:09,726 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:52:09,735 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:52:09,736 - INFO - allennlp.training.tensorboard_writer - loss            |     0.441  |     0.764
+2025-03-24 13:52:09,736 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:52:09,737 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:52:09,738 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:52:09,738 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.559
+2025-03-24 13:52:09,739 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:52:09,739 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:52:09,739 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.104
+2025-03-24 13:52:09,740 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.582
+2025-03-24 13:52:09,740 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:52:09,741 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:52:09,832 - INFO - allennlp.training.trainer - Epoch duration: 0:01:29.804281
+2025-03-24 13:52:09,832 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:54:25
+2025-03-24 13:52:09,832 - INFO - allennlp.training.trainer - Epoch 21/99
+2025-03-24 13:52:09,832 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:52:10,264 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:52:10,279 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_2', 'N_3', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_half', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_equal', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_add', 'V_0', 'C_2']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1', 'g_double', 'V_2']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]]
+2025-03-24 13:53:24,936 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:53:34,903 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:53:34,904 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:53:34,905 - INFO - allennlp.training.tensorboard_writer - loss            |     0.412  |     0.775
+2025-03-24 13:53:34,906 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:53:34,906 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:53:34,907 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:53:34,907 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.602
+2025-03-24 13:53:34,908 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:53:34,908 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:53:34,908 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.102
+2025-03-24 13:53:34,909 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.588
+2025-03-24 13:53:34,909 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:53:34,910 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:53:34,914 - INFO - allennlp.training.trainer - Epoch duration: 0:01:25.081265
+2025-03-24 13:53:34,914 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:52:52
+2025-03-24 13:53:34,914 - INFO - allennlp.training.trainer - Epoch 22/99
+2025-03-24 13:53:34,914 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:53:35,247 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:53:35,248 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:53:35,248 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:53:35,248 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:53:35,249 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:53:35,249 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:53:35,249 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:53:35,262 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_mul', 'N_2', 'N_3', 'g_minus', 'V_0', 'N_1', 'g_mul', 'V_1', 'N_3']], [['g_half', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_1', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0']]]
+2025-03-24 13:54:53,092 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:55:01,776 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:55:01,787 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:55:01,787 - INFO - allennlp.training.tensorboard_writer - loss            |     0.389  |     0.763
+2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.613
+2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:55:01,790 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.097
+2025-03-24 13:55:01,790 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.599
+2025-03-24 13:55:01,791 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:55:01,791 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:55:01,823 - INFO - allennlp.training.trainer - Epoch duration: 0:01:26.908723
+2025-03-24 13:55:01,823 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:51:25
+2025-03-24 13:55:01,823 - INFO - allennlp.training.trainer - Epoch 23/99
+2025-03-24 13:55:01,823 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:55:02,158 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:55:02,159 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:55:02,159 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:55:02,159 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:55:02,159 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:55:02,160 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:55:02,160 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:55:02,172 - INFO - allennlp.training.trainer - Training
+2025-03-24 13:56:17,007 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:56:25,616 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:56:25,618 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:56:25,619 - INFO - allennlp.training.tensorboard_writer - loss            |     0.367  |     0.823
+2025-03-24 13:56:25,620 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:56:25,620 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:56:25,621 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:56:25,621 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.608
+2025-03-24 13:56:25,621 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:56:25,622 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:56:25,622 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.117
+2025-03-24 13:56:25,622 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.599
+2025-03-24 13:56:25,623 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:56:25,623 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:56:25,649 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.825453
+2025-03-24 13:56:25,649 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:49:49
+2025-03-24 13:56:25,649 - INFO - allennlp.training.trainer - Epoch 24/99
+2025-03-24 13:56:25,649 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:56:26,022 - INFO - allennlp.training.trainer - Training
+2025-03-24 13:57:39,815 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:57:48,334 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:57:48,370 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:57:48,371 - INFO - allennlp.training.tensorboard_writer - loss            |     0.359  |     0.803
+2025-03-24 13:57:48,372 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:57:48,372 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:57:48,372 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:57:48,373 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.604
+2025-03-24 13:57:48,373 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:57:48,373 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:57:48,374 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.110
+2025-03-24 13:57:48,374 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.599
+2025-03-24 13:57:48,375 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:57:48,375 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:57:48,436 - INFO - allennlp.training.trainer - Epoch duration: 0:01:22.786983
+2025-03-24 13:57:48,436 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:48:10
+2025-03-24 13:57:48,436 - INFO - allennlp.training.trainer - Epoch 25/99
+2025-03-24 13:57:48,436 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:57:48,819 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:57:48,820 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:57:48,820 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:57:48,820 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:57:48,820 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:57:48,821 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:57:48,821 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:57:48,834 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'gougu_minus', 'V_0', 'V_1', 'g_double', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'V_2', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_2', 'g_add', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_add', 'N_0', 'N_1', 'g_minus', 'N_0', 'V_0', 'gougu_minus', 'N_0', 'V_1', 'g_double', 'V_2']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1', 'g_minus', 'N_0', 'V_2']], [['g_double', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']]]
+2025-03-24 13:59:02,415 - INFO - allennlp.training.trainer - Validating
+2025-03-24 13:59:10,932 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 13:59:10,932 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 13:59:10,933 - INFO - allennlp.training.tensorboard_writer - loss            |     0.341  |     0.807
+2025-03-24 13:59:10,934 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 13:59:10,935 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 13:59:10,935 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 13:59:10,935 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.599
+2025-03-24 13:59:10,936 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 13:59:10,936 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 13:59:10,936 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.119
+2025-03-24 13:59:10,937 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.601
+2025-03-24 13:59:10,938 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 13:59:10,938 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 13:59:10,987 - INFO - allennlp.training.trainer - Epoch duration: 0:01:22.550905
+2025-03-24 13:59:10,987 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:46:33
+2025-03-24 13:59:10,987 - INFO - allennlp.training.trainer - Epoch 26/99
+2025-03-24 13:59:10,987 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 13:59:11,331 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 13:59:11,332 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 13:59:11,332 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 13:59:11,333 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 13:59:11,333 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 13:59:11,333 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 13:59:11,333 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 13:59:11,346 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_tan', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_add', 'N_0', 'N_2']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_minus', 'N_1', 'N_0', 'gougu_minus', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]]
+2025-03-24 14:00:24,864 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:00:33,332 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:00:33,333 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:00:33,333 - INFO - allennlp.training.tensorboard_writer - loss            |     0.319  |     0.828
+2025-03-24 14:00:33,335 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:00:33,335 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:00:33,335 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:00:33,335 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.618
+2025-03-24 14:00:33,336 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:00:33,336 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:00:33,337 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.111
+2025-03-24 14:00:33,337 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.597
+2025-03-24 14:00:33,338 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:00:33,338 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:00:33,352 - INFO - allennlp.training.trainer - Epoch duration: 0:01:22.364923
+2025-03-24 14:00:33,352 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:44:55
+2025-03-24 14:00:33,353 - INFO - allennlp.training.trainer - Epoch 27/99
+2025-03-24 14:00:33,353 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:00:33,671 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:00:33,672 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:00:33,672 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:00:33,672 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:00:33,672 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:00:33,673 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:00:33,673 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:00:33,686 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'N_4', 'g_mul', 'V_1', 'N_2']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0']]]
+2025-03-24 14:01:47,939 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:01:56,577 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:01:56,577 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:01:56,578 - INFO - allennlp.training.tensorboard_writer - loss            |     0.304  |     0.851
+2025-03-24 14:01:56,579 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:01:56,579 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:01:56,580 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:01:56,580 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.625
+2025-03-24 14:01:56,580 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:01:56,580 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:01:56,581 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.125
+2025-03-24 14:01:56,581 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.603
+2025-03-24 14:01:56,582 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:01:56,582 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:01:56,621 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.268260
+2025-03-24 14:01:56,621 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:43:21
+2025-03-24 14:01:56,621 - INFO - allennlp.training.trainer - Epoch 28/99
+2025-03-24 14:01:56,621 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:01:57,019 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:01:57,020 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:01:57,020 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:01:57,020 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:01:57,021 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:01:57,021 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:01:57,021 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:01:57,034 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:03:11,775 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:03:20,384 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:03:20,403 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:03:20,406 - INFO - allennlp.training.tensorboard_writer - loss            |     0.291  |     0.860
+2025-03-24 14:03:20,406 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:03:20,407 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:03:20,407 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:03:20,407 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.648
+2025-03-24 14:03:20,408 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:03:20,408 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:03:20,409 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.109
+2025-03-24 14:03:20,409 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.602
+2025-03-24 14:03:20,410 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:03:20,410 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:03:20,430 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.808762
+2025-03-24 14:03:20,430 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:41:50
+2025-03-24 14:03:20,430 - INFO - allennlp.training.trainer - Epoch 29/99
+2025-03-24 14:03:20,430 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:03:20,779 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:03:20,780 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:03:20,780 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:03:20,781 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:03:20,781 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:03:20,781 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:03:20,781 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:03:20,794 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:04:35,554 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:04:44,097 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:04:44,098 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:04:44,098 - INFO - allennlp.training.tensorboard_writer - loss            |     0.269  |     0.852
+2025-03-24 14:04:44,099 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:04:44,099 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:04:44,100 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:04:44,100 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.636
+2025-03-24 14:04:44,100 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:04:44,101 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:04:44,101 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.114
+2025-03-24 14:04:44,102 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.613
+2025-03-24 14:04:44,102 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:04:44,103 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:04:53,573 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'.
+2025-03-24 14:04:57,831 - INFO - allennlp.training.trainer - Epoch duration: 0:01:37.401059
+2025-03-24 14:04:57,832 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:40:50
+2025-03-24 14:04:57,832 - INFO - allennlp.training.trainer - Epoch 30/99
+2025-03-24 14:04:57,832 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:04:58,205 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:04:58,206 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:04:58,206 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:04:58,206 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:04:58,206 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:04:58,207 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:04:58,207 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:04:58,221 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_4', 'N_3', 'N_5']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_bili', 'N_3', 'N_2', 'N_4']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_1']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_minus', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_divide', 'C_4', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_add', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'gougu_add', 'V_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']]]
+selected_programs [[['g_mul', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_double', 'N_0', 'g_double', 'V_1']], [['g_half', 'N_0']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_half', 'V_1', 'g_add', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_2', 'V_1']], [['g_double', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_mul', 'N_0', 'N_2', 'g_divide', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]]
+2025-03-24 14:06:12,243 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:06:20,828 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:06:20,828 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:06:20,829 - INFO - allennlp.training.tensorboard_writer - loss            |     0.254  |     0.898
+2025-03-24 14:06:20,829 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:06:20,829 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:06:20,830 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:06:20,830 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.641
+2025-03-24 14:06:20,830 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:06:20,830 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:06:20,832 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.104
+2025-03-24 14:06:20,832 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.591
+2025-03-24 14:06:20,832 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:06:20,833 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:06:20,859 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.026463
+2025-03-24 14:06:20,859 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:39:16
+2025-03-24 14:06:20,859 - INFO - allennlp.training.trainer - Epoch 31/99
+2025-03-24 14:06:20,859 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:06:21,154 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:06:21,166 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_add', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_3', 'N_2', 'N_1', 'g_double', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_sin', 'N_3', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_tan', 'N_1', 'g_add', 'V_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_1', 'N_0', 'gougu_add', 'V_0', 'N_0']], [['g_equal', 'N_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_tan', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_sin', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_minus', 'N_2', 'N_0', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0']], [['g_half', 'N_0', 'g_bili', 'N_1', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_add', 'V_0', 'V_1']], [['g_half', 'N_1', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_divide', 'N_0', 'N_3', 'g_divide', 'N_4', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_0', 'N_2', 'g_add', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_0']], [['g_half', 'N_1', 'gougu_add', 'N_1', 'V_0', 'g_add', 'V_1', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'V_1']]]
+2025-03-24 14:07:35,194 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:07:43,698 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:07:43,699 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:07:43,699 - INFO - allennlp.training.tensorboard_writer - loss            |     0.244  |     0.855
+2025-03-24 14:07:43,700 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:07:43,700 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:07:43,701 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:07:43,701 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.650
+2025-03-24 14:07:43,701 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:07:43,702 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:07:43,702 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.119
+2025-03-24 14:07:43,702 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.604
+2025-03-24 14:07:43,703 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:07:43,703 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:07:43,755 - INFO - allennlp.training.trainer - Epoch duration: 0:01:22.895791
+2025-03-24 14:07:43,755 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:37:42
+2025-03-24 14:07:43,755 - INFO - allennlp.training.trainer - Epoch 32/99
+2025-03-24 14:07:43,755 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:07:44,084 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:07:44,085 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:07:44,085 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:07:44,086 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:07:44,086 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:07:44,086 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:07:44,086 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:07:44,099 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:09:02,203 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:09:12,076 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:09:12,077 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:09:12,078 - INFO - allennlp.training.tensorboard_writer - loss            |     0.241  |     0.915
+2025-03-24 14:09:12,079 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:09:12,079 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:09:12,080 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:09:12,080 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.621
+2025-03-24 14:09:12,080 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:09:12,081 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:09:12,081 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.119
+2025-03-24 14:09:12,081 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.590
+2025-03-24 14:09:12,082 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:09:12,083 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:09:12,102 - INFO - allennlp.training.trainer - Epoch duration: 0:01:28.347464
+2025-03-24 14:09:12,103 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:36:20
+2025-03-24 14:09:12,103 - INFO - allennlp.training.trainer - Epoch 33/99
+2025-03-24 14:09:12,103 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:09:12,438 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:09:12,439 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:09:12,439 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:09:12,440 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:09:12,440 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:09:12,440 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:09:12,440 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:09:12,454 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_half', 'N_2', 'g_add', 'N_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_add', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']]]
+selected_programs [[['g_mul', 'N_2', 'N_3', 'g_mul', 'V_0', 'V_0']], [['g_half', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_cos', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0']]]
+selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_1', 'V_1']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_add', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]]
+2025-03-24 14:10:32,716 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:10:42,650 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:10:42,651 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:10:42,652 - INFO - allennlp.training.tensorboard_writer - loss            |     0.225  |     0.915
+2025-03-24 14:10:42,653 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:10:42,653 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:10:42,654 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:10:42,654 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.645
+2025-03-24 14:10:42,654 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:10:42,655 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:10:42,655 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.117
+2025-03-24 14:10:42,656 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.594
+2025-03-24 14:10:42,656 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:10:42,656 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:10:42,676 - INFO - allennlp.training.trainer - Epoch duration: 0:01:30.573084
+2025-03-24 14:10:42,676 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:35:02
+2025-03-24 14:10:42,676 - INFO - allennlp.training.trainer - Epoch 34/99
+2025-03-24 14:10:42,676 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:10:42,969 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:10:42,984 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_divide', 'N_0', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_bili', 'N_4', 'N_3', 'N_5']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_bili', 'N_4', 'N_5', 'N_5']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_add', 'V_0', 'N_2']], [['g_equal', 'N_1']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_mul', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_mul', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_divide', 'C_4', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'N_0', 'N_2', 'g_add', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_1', 'g_half', 'V_1']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_mul', 'N_0', 'N_2', 'g_add', 'V_0', 'N_2']], [['g_mul', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_0']]]
+selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_divide', 'N_0', 'N_1']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_minus', 'N_2', 'N_1', 'g_divide', 'V_0', 'N_2']], [['g_equal', 'N_0', 'g_divide', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_mul', 'V_1', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_bili', 'N_2', 'N_0', 'N_1', 'gougu_add', 'V_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_sin', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'N_0', 'N_2', 'g_add', 'V_1', 'N_2']], [['g_equal', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'N_0']]]
+selected_programs [[['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_tan', 'V_0', 'g_mul', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_2']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_add', 'N_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]]
+2025-03-24 14:12:03,109 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:12:11,752 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:12:11,753 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:12:11,753 - INFO - allennlp.training.tensorboard_writer - loss            |     0.208  |     0.923
+2025-03-24 14:12:11,754 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:12:11,755 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:12:11,756 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:12:11,756 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.677
+2025-03-24 14:12:11,756 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:12:11,756 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:12:11,757 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.114
+2025-03-24 14:12:11,757 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.592
+2025-03-24 14:12:11,757 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:12:11,758 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:12:11,790 - INFO - allennlp.training.trainer - Epoch duration: 0:01:29.113678
+2025-03-24 14:12:11,790 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:33:41
+2025-03-24 14:12:11,790 - INFO - allennlp.training.trainer - Epoch 35/99
+2025-03-24 14:12:11,790 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:12:12,133 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:12:12,134 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:12:12,134 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:12:12,134 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:12:12,134 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:12:12,135 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:12:12,135 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:12:12,148 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:13:26,122 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:13:35,886 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:13:35,887 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - loss            |     0.200  |     0.956
+2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.665
+2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:13:35,889 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:13:35,889 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.096
+2025-03-24 14:13:35,889 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.618
+2025-03-24 14:13:35,889 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:13:35,892 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:13:46,234 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'.
+2025-03-24 14:13:51,570 - INFO - allennlp.training.trainer - Epoch duration: 0:01:39.779376
+2025-03-24 14:13:51,571 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:32:38
+2025-03-24 14:13:51,571 - INFO - allennlp.training.trainer - Epoch 36/99
+2025-03-24 14:13:51,571 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:13:51,994 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:13:52,012 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_equal', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'N_2', 'N_3', 'g_divide', 'V_0', 'N_4']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']]]
+selected_programs [[['g_mul', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_half', 'N_0']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'C_2']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'N_2', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_half', 'V_1', 'g_add', 'V_0', 'V_2']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_2', 'gougu_add', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_0', 'g_divide', 'V_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]]
+selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_4', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'cal_circle_area', 'V_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_divide', 'N_1', 'N_2']], [['g_half', 'N_0', 'g_equal', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_mul', 'V_1', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_sin', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_double', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']]]
+2025-03-24 14:15:12,729 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:15:22,761 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:15:22,762 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:15:22,763 - INFO - allennlp.training.tensorboard_writer - loss            |     0.192  |     0.955
+2025-03-24 14:15:22,764 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:15:22,764 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:15:22,765 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:15:22,765 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.673
+2025-03-24 14:15:22,766 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:15:22,766 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:15:22,767 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.107
+2025-03-24 14:15:22,767 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.622
+2025-03-24 14:15:22,768 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:15:22,769 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:15:31,538 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'.
+2025-03-24 14:15:40,479 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.908280
+2025-03-24 14:15:40,480 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:31:49
+2025-03-24 14:15:40,480 - INFO - allennlp.training.trainer - Epoch 37/99
+2025-03-24 14:15:40,481 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:15:40,818 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:15:40,818 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:15:40,819 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:15:40,819 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:15:40,819 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:15:40,819 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:15:40,819 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:15:40,832 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_3', 'N_0', 'g_mul', 'N_1', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_3', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'N_1', 'N_0', 'gougu_minus', 'V_1', 'V_0', 'gougu_add', 'V_2', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_2', 'N_1', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['gougu_add', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_mul', 'N_0', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']]]
+selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_half', 'N_0', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0', 'gougu_minus', 'N_0', 'V_1', 'g_double', 'V_2']], [['g_equal', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_equal', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_add', 'N_0', 'N_0']]]
+2025-03-24 14:16:58,182 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:17:08,211 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:17:08,212 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:17:08,213 - INFO - allennlp.training.tensorboard_writer - loss            |     0.178  |     0.999
+2025-03-24 14:17:08,214 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:17:08,214 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:17:08,215 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:17:08,215 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.677
+2025-03-24 14:17:08,215 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:17:08,216 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:17:08,216 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.109
+2025-03-24 14:17:08,217 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.625
+2025-03-24 14:17:08,217 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:17:08,217 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:17:18,200 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'.
+2025-03-24 14:17:28,006 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.525251
+2025-03-24 14:17:28,006 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:30:54
+2025-03-24 14:17:28,007 - INFO - allennlp.training.trainer - Epoch 38/99
+2025-03-24 14:17:28,007 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:17:28,397 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:17:28,415 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_double', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_mul', 'N_1', 'N_2', 'g_add', 'V_0', 'N_3', 'g_divide', 'V_1', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_2', 'N_2', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_1', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_2', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2', 'g_half', 'V_1', 'gougu_minus', 'N_1', 'V_2']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_equal', 'N_0']], [['g_divide', 'N_1', 'N_2', 'g_mul', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_3', 'g_add', 'N_2', 'V_0', 'g_bili', 'V_1', 'N_1', 'N_0']], [['g_minus', 'C_4', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'N_2', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_divide', 'N_2', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['gougu_minus', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_1', 'V_0']], [['g_minus', 'C_3', 'C_2']], [['gougu_minus', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0']]]
+selected_programs [[['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['cal_circle_area', 'N_0']], [['g_bili', 'N_0', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['cal_circle_area', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_double', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'C_5']], [['cal_circle_area', 'N_0', 'g_divide', 'N_1', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_2', 'N_3', 'g_add', 'V_0', 'N_3']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_3', 'N_5', 'N_9', 'g_divide', 'N_8', 'N_7', 'g_add', 'V_0', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']]]
+selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1', 'gougu_minus', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_double', 'N_1']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'N_0', 'g_double', 'V_1']]]
+2025-03-24 14:18:45,828 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:18:55,098 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:18:55,098 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:18:55,099 - INFO - allennlp.training.tensorboard_writer - loss            |     0.180  |     0.984
+2025-03-24 14:18:55,099 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:18:55,100 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:18:55,101 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:18:55,101 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.674
+2025-03-24 14:18:55,102 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:18:55,102 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:18:55,102 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.117
+2025-03-24 14:18:55,103 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.621
+2025-03-24 14:18:55,104 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:18:55,104 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:19:05,102 - INFO - allennlp.training.trainer - Epoch duration: 0:01:37.095233
+2025-03-24 14:19:05,103 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:29:41
+2025-03-24 14:19:05,103 - INFO - allennlp.training.trainer - Epoch 39/99
+2025-03-24 14:19:05,103 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:19:05,476 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:19:05,493 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'cal_circle_area', 'V_0']], [['g_divide', 'N_1', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_divide', 'N_1', 'N_2']], [['g_double', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0', 'g_mul', 'V_1', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'N_2', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_divide', 'V_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_sin', 'V_1']], [['g_equal', 'N_1']], [['g_double', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']]]
+2025-03-24 14:20:23,480 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:20:32,160 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:20:32,161 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:20:32,162 - INFO - allennlp.training.tensorboard_writer - loss            |     0.174  |     1.013
+2025-03-24 14:20:32,162 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:20:32,163 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:20:32,164 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:20:32,164 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.687
+2025-03-24 14:20:32,164 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:20:32,165 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:20:32,165 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.121
+2025-03-24 14:20:32,165 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.591
+2025-03-24 14:20:32,165 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:20:32,166 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:20:32,209 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.106212
+2025-03-24 14:20:32,209 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:28:11
+2025-03-24 14:20:32,209 - INFO - allennlp.training.trainer - Epoch 40/99
+2025-03-24 14:20:32,209 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:20:32,541 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:20:32,554 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:21:47,053 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:21:55,838 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:21:55,838 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:21:55,839 - INFO - allennlp.training.tensorboard_writer - loss            |     0.171  |     1.032
+2025-03-24 14:21:55,839 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:21:55,840 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:21:55,840 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:21:55,841 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.682
+2025-03-24 14:21:55,841 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:21:55,842 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:21:55,842 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.116
+2025-03-24 14:21:55,843 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.617
+2025-03-24 14:21:55,843 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:21:55,844 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:22:06,234 - INFO - allennlp.training.trainer - Epoch duration: 0:01:34.024312
+2025-03-24 14:22:06,234 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:26:51
+2025-03-24 14:22:06,235 - INFO - allennlp.training.trainer - Epoch 41/99
+2025-03-24 14:22:06,235 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:22:06,618 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:22:06,619 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:22:06,619 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:22:06,619 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:22:06,620 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:22:06,620 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:22:06,620 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:22:06,634 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_2', 'g_minus', 'N_0', 'N_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_half', 'N_2', 'g_divide', 'N_3', 'N_4', 'g_minus', 'V_0', 'V_1']], [['g_minus', 'N_2', 'N_3', 'g_bili', 'N_1', 'N_2', 'V_0']], [['g_sin', 'N_0', 'g_divide', 'N_2', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0', 'g_half', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_add', 'N_0', 'V_0', 'g_add', 'N_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_sin', 'V_0', 'g_divide', 'N_0', 'V_1']], [['gougu_minus', 'N_1', 'N_2', 'g_add', 'V_0', 'N_1', 'g_mul', 'V_1', 'N_3']], [['g_minus', 'N_0', 'N_1', 'gougu_add', 'N_0', 'V_0', 'g_divide', 'N_1', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_equal', 'N_0']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1', 'g_double', 'V_2']], [['g_half', 'N_2', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_2', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0', 'g_add', 'V_1', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['cal_circle_area', 'N_1', 'g_divide', 'C_1', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_half', 'N_0', 'cal_circle_area', 'V_0', 'g_divide', 'C_2', 'C_4', 'g_mul', 'V_1', 'V_2']], [['cal_circle_area', 'N_1', 'g_divide', 'N_0', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_1', 'N_2', 'g_add', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_3', 'g_half', 'V_2']], [['g_mul', 'N_0', 'V_0', 'g_half', 'V_1']], [['g_mul', 'N_1', 'N_3']], [['g_mul', 'N_0', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_2', 'g_add', 'V_0', 'N_0', 'g_half', 'V_1', 'g_mul', 'V_2', 'N_3']]]
+2025-03-24 14:23:20,969 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:23:29,579 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:23:29,579 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:23:29,580 - INFO - allennlp.training.tensorboard_writer - loss            |     0.160  |     1.019
+2025-03-24 14:23:29,580 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:23:29,580 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:23:29,580 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:23:29,581 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.668
+2025-03-24 14:23:29,582 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:23:29,582 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:23:29,583 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.122
+2025-03-24 14:23:29,583 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.608
+2025-03-24 14:23:29,583 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:23:29,583 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:23:29,609 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.374207
+2025-03-24 14:23:29,609 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:25:16
+2025-03-24 14:23:29,609 - INFO - allennlp.training.trainer - Epoch 42/99
+2025-03-24 14:23:29,609 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:23:29,917 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:23:29,918 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:23:29,918 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:23:29,918 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:23:29,918 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:23:29,919 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:23:29,919 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:23:29,932 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_equal', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_4', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_1', 'g_mul', 'V_0', 'N_6']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_cos', 'V_0', 'g_mul', 'C_5', 'V_1']], [['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']]]
+selected_programs [[['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_add', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_add', 'V_0', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_equal', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']]]
+2025-03-24 14:24:44,556 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:24:54,677 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:24:54,677 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:24:54,677 - INFO - allennlp.training.tensorboard_writer - loss            |     0.152  |     1.039
+2025-03-24 14:24:54,679 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:24:54,679 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:24:54,679 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:24:54,680 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.684
+2025-03-24 14:24:54,680 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:24:54,680 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:24:54,681 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.109
+2025-03-24 14:24:54,682 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.625
+2025-03-24 14:24:54,682 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:24:54,683 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:25:05,497 - INFO - allennlp.training.trainer - Epoch duration: 0:01:35.888198
+2025-03-24 14:25:05,498 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:23:58
+2025-03-24 14:25:05,498 - INFO - allennlp.training.trainer - Epoch 43/99
+2025-03-24 14:25:05,499 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:25:05,854 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:25:05,869 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0', 'g_half', 'V_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']]]
+selected_programs [[['g_minus', 'C_3', 'N_0', 'g_add', 'V_0', 'N_0', 'g_add', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_half', 'N_1', 'g_minus', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'gougu_minus', 'V_0', 'V_1', 'g_double', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_1', 'g_sin', 'V_0', 'g_mul', 'N_2', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_minus', 'N_1', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']]]
+selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_1', 'V_1', 'g_divide', 'V_2', 'N_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_1', 'N_3', 'g_divide', 'V_1', 'V_2']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'N_2', 'N_0', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_1', 'g_add', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_0', 'g_mul', 'V_1', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_bili', 'N_2', 'V_0', 'N_1']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0', 'g_add', 'V_1', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_tan', 'N_1', 'g_mul', 'V_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['gougu_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['gougu_add', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_0', 'V_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0', 'g_add', 'V_1', 'N_2']], [['g_double', 'N_0']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0']]]
+selected_programs [[['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_1', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_half', 'V_1', 'g_add', 'V_0', 'V_2']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_minus', 'V_2', 'N_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_1', 'g_add', 'V_0', 'N_0', 'g_minus', 'V_1', 'N_2', 'g_minus', 'V_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]]
+2025-03-24 14:26:20,753 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:26:29,499 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:26:29,500 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:26:29,500 - INFO - allennlp.training.tensorboard_writer - loss            |     0.141  |     1.065
+2025-03-24 14:26:29,501 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:26:29,502 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:26:29,502 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:26:29,503 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.697
+2025-03-24 14:26:29,503 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:26:29,503 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:26:29,504 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.126
+2025-03-24 14:26:29,505 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.608
+2025-03-24 14:26:29,505 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:26:29,506 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:26:29,538 - INFO - allennlp.training.trainer - Epoch duration: 0:01:24.039338
+2025-03-24 14:26:29,538 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:22:24
+2025-03-24 14:26:29,538 - INFO - allennlp.training.trainer - Epoch 44/99
+2025-03-24 14:26:29,538 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:26:29,833 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:26:29,834 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:26:29,834 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:26:29,834 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:26:29,835 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:26:29,835 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:26:29,835 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:26:29,855 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_1', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'N_2', 'g_equal', 'N_1']], [['g_cos', 'N_0', 'g_mul', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']]]
+selected_programs [[['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_divide', 'N_0', 'N_2', 'g_divide', 'N_1', 'V_0', 'g_add', 'N_1', 'V_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_4', 'N_3', 'N_5']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_1', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_4', 'N_3', 'N_5', 'g_add', 'V_0', 'N_5', 'g_minus', 'N_5', 'V_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'N_2', 'V_0']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0', 'g_add', 'V_1', 'V_1', 'g_add', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_mul', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_divide', 'N_0', 'V_0', 'g_half', 'N_1', 'g_minus', 'V_2', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_divide', 'C_4', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0', 'g_minus', 'N_1', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_add', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_2']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_sin', 'N_2', 'g_divide', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_1', 'g_add', 'V_2', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_1']]]
+2025-03-24 14:27:44,248 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:27:52,963 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:27:52,963 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:27:52,963 - INFO - allennlp.training.tensorboard_writer - loss            |     0.120  |     1.062
+2025-03-24 14:27:52,964 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:27:52,965 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:27:52,965 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:27:52,965 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.705
+2025-03-24 14:27:52,966 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:27:52,966 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:27:52,967 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.118
+2025-03-24 14:27:52,967 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.625
+2025-03-24 14:27:52,968 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:27:52,968 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:28:03,377 - INFO - allennlp.training.trainer - Epoch duration: 0:01:33.839376
+2025-03-24 14:28:03,378 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:21:02
+2025-03-24 14:28:03,378 - INFO - allennlp.training.trainer - Epoch 45/99
+2025-03-24 14:28:03,379 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:28:03,741 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:28:03,742 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:28:03,743 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:28:03,743 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:28:03,743 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:28:03,743 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:28:03,743 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:28:03,757 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_double', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_mul', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_mul', 'N_1', 'N_2', 'g_add', 'V_0', 'N_3', 'g_divide', 'V_1', 'N_1']], [['g_add', 'N_0', 'N_2', 'g_bili', 'V_0', 'N_2', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_1', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_2', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['gougu_minus', 'N_0', 'N_1', 'g_add', 'V_0', 'N_1']], [['g_minus', 'N_0', 'N_2', 'g_minus', 'N_1', 'N_2', 'g_bili', 'N_3', 'V_0', 'V_1', 'g_minus', 'V_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_equal', 'N_0']], [['g_divide', 'N_1', 'N_2', 'g_mul', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_3', 'g_add', 'V_0', 'N_2', 'g_add', 'V_1', 'N_3']], [['g_minus', 'C_3', 'N_0']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'N_2', 'V_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_divide', 'N_2', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_divide', 'N_2', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_divide', 'N_2', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_1', 'V_0']], [['g_divide', 'N_0', 'N_1']]]
+selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'cal_circle_area', 'V_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_divide', 'N_1', 'N_2']], [['g_double', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0', 'g_mul', 'V_1', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'V_0', 'N_0']], [['g_equal', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'N_2', 'N_0', 'g_bili', 'N_1', 'N_0', 'V_0']], [['g_double', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']]]
+2025-03-24 14:29:18,720 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:29:27,412 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:29:27,413 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:29:27,414 - INFO - allennlp.training.tensorboard_writer - loss            |     0.096  |     1.112
+2025-03-24 14:29:27,415 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:29:27,415 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:29:27,415 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:29:27,415 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.705
+2025-03-24 14:29:27,416 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:29:27,416 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:29:27,417 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.115
+2025-03-24 14:29:27,417 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.613
+2025-03-24 14:29:27,417 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:29:27,418 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:29:37,548 - INFO - allennlp.training.trainer - Epoch duration: 0:01:34.169543
+2025-03-24 14:29:37,549 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:19:41
+2025-03-24 14:29:37,549 - INFO - allennlp.training.trainer - Epoch 46/99
+2025-03-24 14:29:37,550 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:29:37,898 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:29:37,914 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'C_2']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_1', 'g_minus', 'N_1', 'V_1']], [['gougu_add', 'N_0', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_0', 'g_add', 'N_0', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_2', 'N_1', 'g_sin', 'V_0', 'g_mul', 'N_0', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_cos', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_minus', 'N_0', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'N_0', 'N_1', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']]]
+selected_programs [[['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_4', 'N_3', 'N_5']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_2', 'N_1', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_add', 'N_4', 'N_5', 'g_bili', 'N_3', 'N_5', 'V_0']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'N_2', 'V_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0', 'g_add', 'V_1', 'V_1', 'g_add', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_mul', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_divide', 'C_4', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'N_0', 'N_2', 'g_add', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_tan', 'N_0', 'g_tan', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_mul', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_0']]]
+selected_programs [[['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_equal', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0', 'g_half', 'V_2']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'N_0', 'V_0']], [['gougu_minus', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_double', 'N_2']], [['g_add', 'N_1', 'N_2', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0', 'g_add', 'V_2', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_2', 'N_0', 'g_add', 'V_0', 'N_1', 'g_add', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'C_5']], [['g_add', 'N_0', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_1', 'g_add', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_1', 'V_0', 'N_2', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']]]
+2025-03-24 14:30:52,488 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:31:01,187 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:31:01,188 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:31:01,188 - INFO - allennlp.training.tensorboard_writer - loss            |     0.087  |     1.121
+2025-03-24 14:31:01,189 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:31:01,190 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:31:01,190 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:31:01,191 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.698
+2025-03-24 14:31:01,191 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:31:01,191 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:31:01,191 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.115
+2025-03-24 14:31:01,192 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.642
+2025-03-24 14:31:01,193 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:31:01,193 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:31:11,411 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'.
+2025-03-24 14:31:16,545 - INFO - allennlp.training.trainer - Epoch duration: 0:01:38.996021
+2025-03-24 14:31:16,546 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:18:24
+2025-03-24 14:31:16,546 - INFO - allennlp.training.trainer - Epoch 47/99
+2025-03-24 14:31:16,546 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:31:16,906 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:31:16,921 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:32:31,775 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:32:40,440 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:32:40,441 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:32:40,441 - INFO - allennlp.training.tensorboard_writer - loss            |     0.082  |     1.134
+2025-03-24 14:32:40,443 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:32:40,443 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:32:40,444 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:32:40,444 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.693
+2025-03-24 14:32:40,444 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:32:40,445 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:32:40,445 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.104
+2025-03-24 14:32:40,445 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.635
+2025-03-24 14:32:40,445 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:32:40,446 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:32:52,612 - INFO - allennlp.training.trainer - Epoch duration: 0:01:36.065327
+2025-03-24 14:32:52,612 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:17:03
+2025-03-24 14:32:52,612 - INFO - allennlp.training.trainer - Epoch 48/99
+2025-03-24 14:32:52,613 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:32:52,982 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:32:52,997 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_divide', 'N_2', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_bili', 'N_2', 'N_3', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_1', 'V_0']], [['g_divide', 'N_1', 'N_2']], [['g_divide', 'N_2', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_2', 'V_0']], [['g_double', 'N_0']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_2', 'V_0']], [['gougu_minus', 'N_2', 'N_1', 'g_bili', 'V_0', 'N_1', 'N_3']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_2', 'V_0']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_tan', 'V_0']], [['g_divide', 'N_1', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_3', 'V_0', 'g_divide', 'V_1', 'N_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_sin', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_0']]]
+2025-03-24 14:34:07,627 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:34:16,323 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:34:16,324 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:34:16,325 - INFO - allennlp.training.tensorboard_writer - loss            |     0.078  |     1.159
+2025-03-24 14:34:16,326 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:34:16,326 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:34:16,327 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:34:16,327 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.707
+2025-03-24 14:34:16,328 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:34:16,328 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:34:16,328 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.123
+2025-03-24 14:34:16,329 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.631
+2025-03-24 14:34:16,329 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:34:16,330 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:34:26,656 - INFO - allennlp.training.trainer - Epoch duration: 0:01:34.043945
+2025-03-24 14:34:26,657 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:15:39
+2025-03-24 14:34:26,657 - INFO - allennlp.training.trainer - Epoch 49/99
+2025-03-24 14:34:26,657 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:34:27,059 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:34:27,060 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:34:27,061 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:34:27,061 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:34:27,061 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:34:27,061 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:34:27,061 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:34:27,075 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0', 'g_half', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_equal', 'N_0']], [['g_mul', 'N_2', 'N_5']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_minus', 'N_1', 'V_1']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'C_2', 'g_half', 'N_0', 'g_half', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_double', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_half', 'V_0', 'g_minus', 'V_1', 'V_2']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1', 'gougu_add', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2', 'gougu_add', 'V_0', 'N_0']]]
+2025-03-24 14:35:48,312 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:35:57,120 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:35:57,120 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:35:57,121 - INFO - allennlp.training.tensorboard_writer - loss            |     0.081  |     1.165
+2025-03-24 14:35:57,122 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:35:57,122 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:35:57,123 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:35:57,123 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.696
+2025-03-24 14:35:57,123 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:35:57,124 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:35:57,124 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.113
+2025-03-24 14:35:57,124 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.633
+2025-03-24 14:35:57,125 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:35:57,125 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:36:06,907 - INFO - allennlp.training.trainer - Epoch duration: 0:01:40.249765
+2025-03-24 14:36:06,908 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:14:22
+2025-03-24 14:36:06,908 - INFO - allennlp.training.trainer - Epoch 50/99
+2025-03-24 14:36:06,908 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:36:07,239 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:36:07,240 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:36:07,241 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:36:07,241 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:36:07,241 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:36:07,241 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:36:07,241 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:36:07,254 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:37:25,430 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:37:35,565 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:37:35,566 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:37:35,567 - INFO - allennlp.training.tensorboard_writer - loss            |     0.074  |     1.180
+2025-03-24 14:37:35,568 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:37:35,568 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:37:35,569 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:37:35,569 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.704
+2025-03-24 14:37:35,569 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:37:35,570 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:37:35,570 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.111
+2025-03-24 14:37:35,571 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.623
+2025-03-24 14:37:35,571 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:37:35,571 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:37:45,879 - INFO - allennlp.training.trainer - Epoch duration: 0:01:38.970530
+2025-03-24 14:37:45,879 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:13:02
+2025-03-24 14:37:45,879 - INFO - allennlp.training.trainer - Epoch 51/99
+2025-03-24 14:37:45,880 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:37:46,287 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:37:46,287 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:37:46,288 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:37:46,288 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:37:46,288 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:37:46,288 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:37:46,288 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:37:46,301 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:39:06,059 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:39:14,788 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:39:14,789 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:39:14,789 - INFO - allennlp.training.tensorboard_writer - loss            |     0.068  |     1.190
+2025-03-24 14:39:14,791 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:39:14,791 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:39:14,791 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:39:14,791 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.700
+2025-03-24 14:39:14,792 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:39:14,793 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:39:14,794 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.115
+2025-03-24 14:39:14,795 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.634
+2025-03-24 14:39:14,795 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:39:14,796 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:39:25,681 - INFO - allennlp.training.trainer - Epoch duration: 0:01:39.801897
+2025-03-24 14:39:25,682 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:11:42
+2025-03-24 14:39:25,682 - INFO - allennlp.training.trainer - Epoch 52/99
+2025-03-24 14:39:25,683 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:39:26,036 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:39:26,049 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:40:40,144 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:40:48,831 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:40:48,832 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:40:48,833 - INFO - allennlp.training.tensorboard_writer - loss            |     0.066  |     1.202
+2025-03-24 14:40:48,833 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:40:48,834 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:40:48,834 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:40:48,835 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.693
+2025-03-24 14:40:48,835 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:40:48,835 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:40:48,836 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.121
+2025-03-24 14:40:48,836 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.606
+2025-03-24 14:40:48,837 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:40:48,837 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:40:48,908 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.226120
+2025-03-24 14:40:48,909 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:10:07
+2025-03-24 14:40:48,909 - INFO - allennlp.training.trainer - Epoch 53/99
+2025-03-24 14:40:48,909 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:40:49,214 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:40:49,215 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:40:49,216 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:40:49,216 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:40:49,216 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:40:49,216 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:40:49,216 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:40:49,229 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_minus', 'N_0', 'V_1', 'g_double', 'V_2']], [['g_half', 'N_0']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_half', 'V_1', 'g_add', 'V_0', 'V_2']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'N_0', 'g_add', 'V_1', 'N_0']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'gougu_minus', 'N_2', 'V_1', 'g_divide', 'V_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]]
+selected_programs [[['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_add', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_1', 'V_1']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1', 'g_minus', 'N_0', 'V_2']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']]]
+selected_programs [[['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0', 'g_minus', 'V_2', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1', 'gougu_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]]
+selected_programs [[['g_double', 'N_0', 'g_sin', 'V_0', 'g_divide', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_add', 'V_1', 'N_0', 'g_minus', 'V_2', 'V_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']]]
+2025-03-24 14:42:04,098 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:42:12,867 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:42:12,868 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:42:12,869 - INFO - allennlp.training.tensorboard_writer - loss            |     0.056  |     1.197
+2025-03-24 14:42:12,870 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:42:12,870 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:42:12,871 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:42:12,871 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.713
+2025-03-24 14:42:12,872 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:42:12,872 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:42:12,872 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.123
+2025-03-24 14:42:12,872 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.625
+2025-03-24 14:42:12,873 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:42:12,874 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:42:24,080 - INFO - allennlp.training.trainer - Epoch duration: 0:01:35.171115
+2025-03-24 14:42:24,081 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:08:42
+2025-03-24 14:42:24,081 - INFO - allennlp.training.trainer - Epoch 54/99
+2025-03-24 14:42:24,081 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:42:24,435 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:42:24,436 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:42:24,436 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:42:24,436 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:42:24,437 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:42:24,437 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:42:24,437 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:42:24,449 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_equal', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0', 'g_half', 'V_2']], [['gougu_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['gougu_minus', 'N_2', 'N_1', 'g_add', 'V_0', 'N_2', 'g_add', 'V_1', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_add', 'N_0', 'N_0', 'g_divide', 'V_1', 'V_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_1', 'V_0', 'N_2', 'g_half', 'V_1', 'gougu_minus', 'V_2', 'N_3']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_minus', 'N_0', 'V_0', 'gougu_add', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']]]
+2025-03-24 14:43:38,822 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:43:47,613 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:43:47,614 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:43:47,615 - INFO - allennlp.training.tensorboard_writer - loss            |     0.049  |     1.226
+2025-03-24 14:43:47,616 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:43:47,616 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:43:47,616 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:43:47,617 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.707
+2025-03-24 14:43:47,617 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:43:47,618 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:43:47,618 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.118
+2025-03-24 14:43:47,618 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.627
+2025-03-24 14:43:47,619 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:43:47,619 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:43:58,586 - INFO - allennlp.training.trainer - Epoch duration: 0:01:34.505436
+2025-03-24 14:43:58,587 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:07:16
+2025-03-24 14:43:58,587 - INFO - allennlp.training.trainer - Epoch 55/99
+2025-03-24 14:43:58,587 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:43:58,979 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:43:58,980 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:43:58,980 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:43:58,980 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:43:58,981 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:43:58,981 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:43:58,981 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:43:58,995 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_sin', 'N_0', 'g_divide', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0', 'g_half', 'V_2']], [['g_divide', 'N_0', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_minus', 'N_2', 'N_1', 'g_add', 'V_0', 'N_0', 'g_add', 'V_1', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_add', 'N_0', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_add', 'N_0', 'N_0', 'g_add', 'V_1', 'V_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_1', 'g_minus', 'V_1', 'N_2', 'g_half', 'V_2']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'N_0', 'V_0', 'g_half', 'V_1']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']]]
+selected_programs [[['g_double', 'N_3', 'gougu_minus', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_3']], [['g_half', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_1', 'g_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']]]
+2025-03-24 14:45:13,195 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:45:21,997 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:45:21,997 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:45:21,998 - INFO - allennlp.training.tensorboard_writer - loss            |     0.048  |     1.226
+2025-03-24 14:45:21,999 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:45:21,999 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:45:21,999 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:45:21,999 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.709
+2025-03-24 14:45:22,000 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:45:22,001 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:45:22,001 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.117
+2025-03-24 14:45:22,001 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.620
+2025-03-24 14:45:22,001 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:45:22,002 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:45:32,521 - INFO - allennlp.training.trainer - Epoch duration: 0:01:33.933025
+2025-03-24 14:45:32,522 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:05:50
+2025-03-24 14:45:32,523 - INFO - allennlp.training.trainer - Epoch 56/99
+2025-03-24 14:45:32,523 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:45:32,875 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:45:32,876 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:45:32,876 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:45:32,876 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:45:32,876 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:45:32,876 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:45:32,877 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:45:32,890 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1', 'g_double', 'V_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'gougu_add', 'V_0', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]]
+2025-03-24 14:46:46,753 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:46:55,489 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:46:55,490 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:46:55,491 - INFO - allennlp.training.tensorboard_writer - loss            |     0.046  |     1.248
+2025-03-24 14:46:55,491 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:46:55,492 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:46:55,493 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:46:55,493 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.708
+2025-03-24 14:46:55,493 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:46:55,493 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:46:55,494 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.109
+2025-03-24 14:46:55,494 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.635
+2025-03-24 14:46:55,494 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:46:55,494 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:47:18,596 - INFO - allennlp.training.trainer - Epoch duration: 0:01:46.073355
+2025-03-24 14:47:18,597 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:04:32
+2025-03-24 14:47:18,597 - INFO - allennlp.training.trainer - Epoch 57/99
+2025-03-24 14:47:18,597 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:47:18,961 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:47:18,962 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:47:18,962 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:47:18,962 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:47:18,962 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:47:18,962 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:47:18,963 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:47:18,976 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:48:35,195 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:48:43,914 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:48:43,915 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:48:43,915 - INFO - allennlp.training.tensorboard_writer - loss            |     0.043  |     1.246
+2025-03-24 14:48:43,916 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:48:43,917 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:48:43,917 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:48:43,917 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.714
+2025-03-24 14:48:43,918 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:48:43,918 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:48:43,918 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.124
+2025-03-24 14:48:43,919 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.621
+2025-03-24 14:48:43,919 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:48:43,920 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:49:08,186 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.588647
+2025-03-24 14:49:08,187 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:03:16
+2025-03-24 14:49:08,187 - INFO - allennlp.training.trainer - Epoch 58/99
+2025-03-24 14:49:08,187 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:49:08,587 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:49:08,588 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:49:08,588 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:49:08,588 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:49:08,588 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:49:08,588 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:49:08,589 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:49:08,604 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:50:24,124 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:50:32,978 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:50:32,979 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - loss            |     0.043  |     1.273
+2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.723
+2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:50:32,982 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:50:32,982 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.126
+2025-03-24 14:50:32,983 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.626
+2025-03-24 14:50:32,983 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:50:32,983 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:50:56,970 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.783204
+2025-03-24 14:50:56,971 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:01:59
+2025-03-24 14:50:56,971 - INFO - allennlp.training.trainer - Epoch 59/99
+2025-03-24 14:50:56,971 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:50:57,318 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:50:57,319 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:50:57,319 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:50:57,319 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:50:57,320 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:50:57,320 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:50:57,320 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:50:57,334 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_minus', 'N_0', 'V_0', 'gougu_add', 'V_0', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_add', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0']]]
+2025-03-24 14:52:14,797 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:52:23,751 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:52:23,751 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - loss            |     0.038  |     1.264
+2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.724
+2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:52:23,753 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:52:23,753 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.126
+2025-03-24 14:52:23,753 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.627
+2025-03-24 14:52:23,753 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:52:23,755 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:52:48,227 - INFO - allennlp.training.trainer - Epoch duration: 0:01:51.256274
+2025-03-24 14:52:48,228 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:00:42
+2025-03-24 14:52:48,228 - INFO - allennlp.training.trainer - Epoch 60/99
+2025-03-24 14:52:48,229 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:52:48,579 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:52:48,580 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:52:48,580 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:52:48,581 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:52:48,581 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660
+2025-03-24 14:52:48,581 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:52:48,581 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:52:48,595 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['g_equal', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_equal', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_add', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']]]
+selected_programs [[['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_add', 'V_1', 'V_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']]]
+2025-03-24 14:54:04,648 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:54:13,595 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:54:13,595 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:54:13,597 - INFO - allennlp.training.tensorboard_writer - loss            |     0.036  |     1.268
+2025-03-24 14:54:13,598 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:54:13,598 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17660.000  |       N/A
+2025-03-24 14:54:13,598 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:54:13,599 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.728
+2025-03-24 14:54:13,600 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:54:13,600 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:54:13,601 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.123
+2025-03-24 14:54:13,601 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.635
+2025-03-24 14:54:13,602 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:54:13,603 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:54:38,334 - INFO - allennlp.training.trainer - Epoch duration: 0:01:50.105851
+2025-03-24 14:54:38,335 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:59:23
+2025-03-24 14:54:38,335 - INFO - allennlp.training.trainer - Epoch 61/99
+2025-03-24 14:54:38,335 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:54:38,691 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:54:38,692 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:54:38,692 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:54:38,693 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:54:38,693 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 14:54:38,693 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:54:38,693 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:54:38,706 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_2', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'C_2']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_1', 'V_1']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_1', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']]]
+2025-03-24 14:55:54,064 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:56:02,928 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:56:02,929 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:56:02,930 - INFO - allennlp.training.tensorboard_writer - loss            |     0.035  |     1.285
+2025-03-24 14:56:02,930 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:56:02,930 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 14:56:02,932 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:56:02,932 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.720
+2025-03-24 14:56:02,932 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:56:02,932 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:56:02,932 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.115
+2025-03-24 14:56:02,933 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.640
+2025-03-24 14:56:02,933 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:56:02,933 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:56:27,127 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.792393
+2025-03-24 14:56:27,128 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:58:02
+2025-03-24 14:56:27,128 - INFO - allennlp.training.trainer - Epoch 62/99
+2025-03-24 14:56:27,128 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:56:27,474 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:56:27,475 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:56:27,475 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:56:27,475 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:56:27,475 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 14:56:27,475 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:56:27,476 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:56:27,488 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'N_0', 'N_2', 'g_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['cal_circle_area', 'N_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_minus', 'N_0', 'V_0', 'cal_circle_area', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_half', 'V_0', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_double', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'C_5']], [['cal_circle_area', 'N_0', 'g_divide', 'N_1', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_2', 'N_3']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_2', 'N_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_4', 'N_3', 'N_5', 'g_minus', 'V_0', 'N_5', 'g_minus', 'N_5', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_minus', 'V_1', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_minus', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']]]
+2025-03-24 14:57:42,461 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:57:51,358 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:57:51,358 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - loss            |     0.034  |     1.277
+2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.724
+2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:57:51,360 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:57:51,360 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.122
+2025-03-24 14:57:51,360 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.617
+2025-03-24 14:57:51,361 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:57:51,363 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 14:58:16,350 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.222078
+2025-03-24 14:58:16,350 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:56:41
+2025-03-24 14:58:16,350 - INFO - allennlp.training.trainer - Epoch 63/99
+2025-03-24 14:58:16,350 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 14:58:16,692 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 14:58:16,692 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 14:58:16,693 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 14:58:16,693 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 14:58:16,693 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 14:58:16,693 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 14:58:16,693 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 14:58:16,704 - INFO - allennlp.training.trainer - Training
+2025-03-24 14:59:32,239 - INFO - allennlp.training.trainer - Validating
+2025-03-24 14:59:41,114 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 14:59:41,114 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 14:59:41,114 - INFO - allennlp.training.tensorboard_writer - loss            |     0.032  |     1.291
+2025-03-24 14:59:41,115 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 14:59:41,115 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 14:59:41,116 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 14:59:41,116 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.715
+2025-03-24 14:59:41,117 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 14:59:41,118 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 14:59:41,118 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.126
+2025-03-24 14:59:41,118 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.633
+2025-03-24 14:59:41,119 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 14:59:41,120 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:00:06,036 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.685559
+2025-03-24 15:00:06,037 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:55:19
+2025-03-24 15:00:06,037 - INFO - allennlp.training.trainer - Epoch 64/99
+2025-03-24 15:00:06,037 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:00:06,443 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:00:06,444 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:00:06,444 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:00:06,445 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:00:06,445 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:00:06,445 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:00:06,445 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:00:06,460 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_half', 'N_0', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_equal', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']]]
+2025-03-24 15:01:22,311 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:01:31,000 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:01:31,002 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:01:31,002 - INFO - allennlp.training.tensorboard_writer - loss            |     0.032  |     1.293
+2025-03-24 15:01:31,002 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:01:31,004 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:01:31,005 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:01:31,006 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.706
+2025-03-24 15:01:31,006 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:01:31,007 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:01:31,007 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.104
+2025-03-24 15:01:31,007 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.645
+2025-03-24 15:01:31,007 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:01:31,008 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:01:46,580 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'.
+2025-03-24 15:02:10,923 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'.
+2025-03-24 15:02:19,504 - INFO - allennlp.training.trainer - Epoch duration: 0:02:13.467008
+2025-03-24 15:02:19,505 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:54:09
+2025-03-24 15:02:19,505 - INFO - allennlp.training.trainer - Epoch 65/99
+2025-03-24 15:02:19,505 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:02:19,927 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:02:19,928 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:02:19,928 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:02:19,929 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:02:19,929 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:02:19,929 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:02:19,929 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:02:19,947 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'N_1', 'g_add', 'V_0', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'N_0', 'C_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'N_0', 'V_0']], [['g_add', 'N_0', 'C_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']]]
+selected_programs [[['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_equal', 'V_0', 'g_add', 'V_0', 'V_1']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']]]
+2025-03-24 15:03:36,233 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:03:45,125 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:03:45,125 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - loss            |     0.030  |     1.304
+2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.723
+2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:03:45,127 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.119
+2025-03-24 15:03:45,128 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.650
+2025-03-24 15:03:45,128 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:03:45,130 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:04:00,855 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'.
+2025-03-24 15:04:23,785 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'.
+2025-03-24 15:04:32,439 - INFO - allennlp.training.trainer - Epoch duration: 0:02:12.934145
+2025-03-24 15:04:32,439 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:52:57
+2025-03-24 15:04:32,439 - INFO - allennlp.training.trainer - Epoch 66/99
+2025-03-24 15:04:32,439 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:04:32,791 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:04:32,792 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:04:32,793 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:04:32,793 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:04:32,793 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:04:32,793 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:04:32,793 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:04:32,808 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']]]
+2025-03-24 15:05:47,755 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:05:57,265 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:05:57,266 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:05:57,266 - INFO - allennlp.training.tensorboard_writer - loss            |     0.029  |     1.318
+2025-03-24 15:05:57,266 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:05:57,266 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.713
+2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.122
+2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.636
+2025-03-24 15:05:57,268 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:05:57,270 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:06:22,308 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.868413
+2025-03-24 15:06:22,309 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:51:31
+2025-03-24 15:06:22,309 - INFO - allennlp.training.trainer - Epoch 67/99
+2025-03-24 15:06:22,309 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:06:22,675 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:06:22,676 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:06:22,676 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:06:22,676 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:06:22,677 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:06:22,677 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:06:22,677 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:06:22,692 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:07:36,657 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:07:45,422 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:07:45,423 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:07:45,424 - INFO - allennlp.training.tensorboard_writer - loss            |     0.029  |     1.317
+2025-03-24 15:07:45,425 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:07:45,426 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:07:45,426 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:07:45,426 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.722
+2025-03-24 15:07:45,427 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:07:45,427 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:07:45,428 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.118
+2025-03-24 15:07:45,428 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.634
+2025-03-24 15:07:45,429 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:07:45,429 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:08:22,297 - INFO - allennlp.training.trainer - Epoch duration: 0:01:59.988171
+2025-03-24 15:08:22,298 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:50:10
+2025-03-24 15:08:22,298 - INFO - allennlp.training.trainer - Epoch 68/99
+2025-03-24 15:08:22,299 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:08:22,619 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:08:22,620 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:08:22,620 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:08:22,620 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:08:22,621 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:08:22,621 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:08:22,621 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:08:22,634 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_add', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_3', 'N_1', 'N_2', 'gougu_add', 'V_0', 'N_0']], [['g_sin', 'C_0', 'g_mul', 'V_0', 'N_0', 'g_add', 'V_1', 'N_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_tan', 'N_1', 'g_add', 'V_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_1', 'N_0', 'gougu_add', 'V_0', 'N_0']], [['g_equal', 'N_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_tan', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_sin', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'N_2', 'N_0', 'g_mul', 'V_1', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_add', 'N_0', 'V_0', 'g_bili', 'N_2', 'N_0', 'V_1', 'g_add', 'V_2', 'N_2']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_add', 'V_0', 'V_1']], [['g_minus', 'C_2', 'N_1', 'g_minus', 'N_2', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_divide', 'N_0', 'N_3', 'g_divide', 'N_4', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_1', 'V_0', 'N_2', 'g_half', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_double', 'N_1', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_tan', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_add', 'V_1', 'V_0']]]
+2025-03-24 15:09:42,401 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:09:52,540 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:09:52,541 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:09:52,542 - INFO - allennlp.training.tensorboard_writer - loss            |     0.029  |     1.336
+2025-03-24 15:09:52,543 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:09:52,543 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:09:52,544 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:09:52,544 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.716
+2025-03-24 15:09:52,544 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:09:52,545 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:09:52,545 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.123
+2025-03-24 15:09:52,546 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.631
+2025-03-24 15:09:52,546 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:09:52,547 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:10:17,217 - INFO - allennlp.training.trainer - Epoch duration: 0:01:54.918312
+2025-03-24 15:10:17,218 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:48:45
+2025-03-24 15:10:17,218 - INFO - allennlp.training.trainer - Epoch 69/99
+2025-03-24 15:10:17,218 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:10:17,591 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:10:17,592 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:10:17,592 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:10:17,592 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:10:17,592 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:10:17,592 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:10:17,593 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:10:17,606 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:11:32,868 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:11:41,598 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:11:41,598 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:11:41,599 - INFO - allennlp.training.tensorboard_writer - loss            |     0.028  |     1.330
+2025-03-24 15:11:41,599 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:11:41,599 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:11:41,600 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:11:41,600 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.723
+2025-03-24 15:11:41,601 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:11:41,601 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:11:41,602 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.114
+2025-03-24 15:11:41,602 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.640
+2025-03-24 15:11:41,602 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:11:41,603 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:12:08,089 - INFO - allennlp.training.trainer - Epoch duration: 0:01:50.870937
+2025-03-24 15:12:08,090 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:47:18
+2025-03-24 15:12:08,090 - INFO - allennlp.training.trainer - Epoch 70/99
+2025-03-24 15:12:08,090 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:12:08,509 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:12:08,510 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:12:08,510 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:12:08,511 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:12:08,511 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:12:08,511 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:12:08,511 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:12:08,524 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_2', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'C_2', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_circle_area', 'N_0']], [['g_double', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'g_double', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1']], [['g_bili', 'C_5', 'C_3', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'V_0', 'N_1']], [['g_cos', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_half', 'V_1']], [['gougu_minus', 'N_0', 'N_1', 'cal_circle_area', 'V_0']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_half', 'N_0', 'cal_cone', 'V_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['gougu_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0', 'g_minus', 'N_2', 'V_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_divide', 'N_2', 'N_3', 'g_divide', 'N_4', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']]]
+selected_programs [[['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_divide', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['cal_circle_area', 'N_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_minus', 'N_0', 'V_0', 'cal_circle_area', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_double', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'C_5']], [['cal_circle_area', 'N_0', 'g_divide', 'N_1', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_mul', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_3', 'N_2', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_4', 'N_5', 'N_5', 'g_add', 'V_0', 'N_5']], [['g_bili', 'N_1', 'N_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_minus', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']]]
+2025-03-24 15:13:22,930 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:13:31,619 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:13:31,620 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:13:31,620 - INFO - allennlp.training.tensorboard_writer - loss            |     0.026  |     1.332
+2025-03-24 15:13:31,621 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:13:31,621 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:13:31,622 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:13:31,622 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.717
+2025-03-24 15:13:31,622 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:13:31,622 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:13:31,622 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.109
+2025-03-24 15:13:31,623 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.644
+2025-03-24 15:13:31,623 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:13:31,624 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:13:57,735 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.645165
+2025-03-24 15:13:57,736 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:45:50
+2025-03-24 15:13:57,736 - INFO - allennlp.training.trainer - Epoch 71/99
+2025-03-24 15:13:57,736 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:13:58,190 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:13:58,191 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:13:58,191 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:13:58,191 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:13:58,191 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:13:58,191 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:13:58,192 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:13:58,204 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:15:12,046 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:15:20,780 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:15:20,780 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:15:20,781 - INFO - allennlp.training.tensorboard_writer - loss            |     0.026  |     1.344
+2025-03-24 15:15:20,781 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:15:20,782 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:15:20,782 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:15:20,783 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.712
+2025-03-24 15:15:20,783 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:15:20,783 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:15:20,783 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.118
+2025-03-24 15:15:20,784 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.642
+2025-03-24 15:15:20,784 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:15:20,785 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:15:44,796 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.059443
+2025-03-24 15:15:44,796 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:44:20
+2025-03-24 15:15:44,797 - INFO - allennlp.training.trainer - Epoch 72/99
+2025-03-24 15:15:44,797 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:15:45,190 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:15:45,204 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_equal', 'N_1']], [['gougu_minus', 'N_1', 'N_2', 'g_add', 'N_0', 'V_0', 'g_add', 'V_1', 'N_2']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_double', 'N_2', 'g_add', 'V_0', 'N_2', 'g_add', 'V_1', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_1', 'g_minus', 'V_1', 'N_2', 'g_half', 'V_2']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']]]
+2025-03-24 15:16:59,171 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:17:07,922 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:17:07,923 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:17:07,923 - INFO - allennlp.training.tensorboard_writer - loss            |     0.024  |     1.354
+2025-03-24 15:17:07,924 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:17:07,925 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:17:07,925 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:17:07,925 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.724
+2025-03-24 15:17:07,926 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:17:07,926 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:17:07,927 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.121
+2025-03-24 15:17:07,927 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.638
+2025-03-24 15:17:07,928 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:17:07,928 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:17:32,029 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.231922
+2025-03-24 15:17:32,030 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:42:49
+2025-03-24 15:17:32,031 - INFO - allennlp.training.trainer - Epoch 73/99
+2025-03-24 15:17:32,031 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:17:32,393 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:17:32,410 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:18:51,729 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:19:01,899 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:19:01,900 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:19:01,901 - INFO - allennlp.training.tensorboard_writer - loss            |     0.023  |     1.360
+2025-03-24 15:19:01,901 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:19:01,902 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:19:01,903 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:19:01,903 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.726
+2025-03-24 15:19:01,903 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:19:01,904 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:19:01,904 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.115
+2025-03-24 15:19:01,905 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.650
+2025-03-24 15:19:01,905 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:19:01,905 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:19:27,258 - INFO - allennlp.training.trainer - Epoch duration: 0:01:55.226941
+2025-03-24 15:19:27,259 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:41:21
+2025-03-24 15:19:27,259 - INFO - allennlp.training.trainer - Epoch 74/99
+2025-03-24 15:19:27,259 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:19:27,601 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:19:27,616 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:20:45,470 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:20:54,150 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:20:54,150 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:20:54,150 - INFO - allennlp.training.tensorboard_writer - loss            |     0.023  |     1.359
+2025-03-24 15:20:54,151 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:20:54,151 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:20:54,152 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:20:54,153 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.724
+2025-03-24 15:20:54,153 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:20:54,153 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:20:54,154 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.118
+2025-03-24 15:20:54,154 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.644
+2025-03-24 15:20:54,155 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:20:54,156 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:21:18,356 - INFO - allennlp.training.trainer - Epoch duration: 0:01:51.096131
+2025-03-24 15:21:18,357 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:39:51
+2025-03-24 15:21:18,357 - INFO - allennlp.training.trainer - Epoch 75/99
+2025-03-24 15:21:18,358 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:21:18,729 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:21:18,730 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:21:18,731 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:21:18,731 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:21:18,731 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:21:18,731 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:21:18,731 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:21:18,747 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_mul', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'C_2']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_half', 'N_0', 'g_half', 'V_0', 'gougu_minus', 'V_0', 'V_1', 'g_double', 'V_2']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_0', 'g_add', 'V_1', 'N_0', 'g_add', 'N_0', 'V_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_1', 'g_sin', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_half', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'N_0', 'g_add', 'V_0', 'C_2']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_minus', 'N_1', 'V_1']], [['g_double', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_bili', 'N_1', 'V_0', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'N_0', 'N_1', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']]]
+selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_add', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_1', 'N_0', 'g_minus', 'V_2', 'V_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0']]]
+2025-03-24 15:22:33,013 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:22:41,723 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:22:41,724 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:22:41,724 - INFO - allennlp.training.tensorboard_writer - loss            |     0.023  |     1.360
+2025-03-24 15:22:41,726 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:22:41,726 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:22:41,727 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:22:41,727 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.724
+2025-03-24 15:22:41,727 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:22:41,727 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:22:41,727 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.130
+2025-03-24 15:22:41,728 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.633
+2025-03-24 15:22:41,728 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:22:41,729 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:23:06,366 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.008678
+2025-03-24 15:23:06,367 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:38:19
+2025-03-24 15:23:06,367 - INFO - allennlp.training.trainer - Epoch 76/99
+2025-03-24 15:23:06,367 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:23:06,767 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:23:06,768 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:23:06,768 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:23:06,769 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:23:06,769 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:23:06,769 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:23:06,769 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:23:06,782 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:24:21,135 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:24:29,850 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:24:29,850 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:24:29,851 - INFO - allennlp.training.tensorboard_writer - loss            |     0.022  |     1.364
+2025-03-24 15:24:29,852 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:24:29,853 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:24:29,853 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:24:29,854 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.727
+2025-03-24 15:24:29,854 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:24:29,854 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:24:29,854 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.120
+2025-03-24 15:24:29,855 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.650
+2025-03-24 15:24:29,855 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:24:29,856 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:24:53,620 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.252715
+2025-03-24 15:24:53,620 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:36:47
+2025-03-24 15:24:53,620 - INFO - allennlp.training.trainer - Epoch 77/99
+2025-03-24 15:24:53,620 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:24:53,982 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:24:53,983 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:24:53,984 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:24:53,984 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:24:53,984 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:24:53,984 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:24:53,984 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:24:53,997 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:26:09,366 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:26:19,489 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:26:19,490 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:26:19,491 - INFO - allennlp.training.tensorboard_writer - loss            |     0.020  |     1.365
+2025-03-24 15:26:19,492 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:26:19,492 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:26:19,493 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:26:19,493 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.721
+2025-03-24 15:26:19,493 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:26:19,494 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:26:19,494 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.120
+2025-03-24 15:26:19,494 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.648
+2025-03-24 15:26:19,495 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:26:19,495 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:26:43,275 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.654991
+2025-03-24 15:26:43,276 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:35:14
+2025-03-24 15:26:43,276 - INFO - allennlp.training.trainer - Epoch 78/99
+2025-03-24 15:26:43,276 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:26:43,663 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:26:43,664 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:26:43,664 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:26:43,664 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:26:43,664 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:26:43,664 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:26:43,665 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:26:43,677 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_double', 'N_0', 'g_add', 'N_1', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_tan', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'N_1', 'g_minus', 'V_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']]]
+selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_sin', 'V_0', 'g_mul', 'N_2', 'V_1']], [['g_minus', 'N_3', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_3', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'N_2', 'N_0', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_1', 'g_add', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'N_1', 'V_0', 'g_bili', 'V_1', 'N_2', 'N_1', 'gougu_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_mul', 'N_2', 'V_0', 'g_sin', 'V_1', 'g_mul', 'V_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']]]
+2025-03-24 15:28:00,017 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:28:08,738 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:28:08,739 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:28:08,740 - INFO - allennlp.training.tensorboard_writer - loss            |     0.020  |     1.382
+2025-03-24 15:28:08,741 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:28:08,741 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:28:08,742 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:28:08,742 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.724
+2025-03-24 15:28:08,743 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:28:08,743 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:28:08,743 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.120
+2025-03-24 15:28:08,744 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.639
+2025-03-24 15:28:08,744 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:28:08,745 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:28:32,177 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.900667
+2025-03-24 15:28:32,177 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:33:42
+2025-03-24 15:28:32,177 - INFO - allennlp.training.trainer - Epoch 79/99
+2025-03-24 15:28:32,178 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:28:32,550 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:28:32,568 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'gougu_add', 'V_0', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_add', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0']]]
+selected_programs [[['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_1']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_1', 'g_add', 'N_2', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_minus', 'C_3', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1']], [['g_add', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_add', 'N_0', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_equal', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'N_0']]]
+2025-03-24 15:29:47,668 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:29:56,456 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:29:56,457 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:29:56,457 - INFO - allennlp.training.tensorboard_writer - loss            |     0.021  |     1.376
+2025-03-24 15:29:56,457 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:29:56,457 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:29:56,458 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:29:56,458 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.727
+2025-03-24 15:29:56,459 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:29:56,459 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:29:56,460 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.113
+2025-03-24 15:29:56,460 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.650
+2025-03-24 15:29:56,461 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:29:56,461 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:30:22,319 - INFO - allennlp.training.trainer - Epoch duration: 0:01:50.141526
+2025-03-24 15:30:22,320 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:32:09
+2025-03-24 15:30:22,320 - INFO - allennlp.training.trainer - Epoch 80/99
+2025-03-24 15:30:22,320 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:30:22,721 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:30:22,722 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:30:22,723 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:30:22,723 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:30:22,723 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:30:22,723 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:30:22,723 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:30:22,737 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:31:42,405 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:31:52,540 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:31:52,540 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:31:52,541 - INFO - allennlp.training.tensorboard_writer - loss            |     0.020  |     1.382
+2025-03-24 15:31:52,541 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:31:52,542 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:31:52,543 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:31:52,543 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.725
+2025-03-24 15:31:52,543 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:31:52,544 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:31:52,544 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.111
+2025-03-24 15:31:52,545 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.651
+2025-03-24 15:31:52,545 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:31:52,546 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:32:03,999 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'.
+2025-03-24 15:32:30,588 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'.
+2025-03-24 15:32:35,555 - INFO - allennlp.training.trainer - Epoch duration: 0:02:13.235409
+2025-03-24 15:32:35,556 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:30:41
+2025-03-24 15:32:35,556 - INFO - allennlp.training.trainer - Epoch 81/99
+2025-03-24 15:32:35,557 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:32:35,928 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:32:35,946 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:33:50,127 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:33:58,842 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:33:58,842 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:33:58,843 - INFO - allennlp.training.tensorboard_writer - loss            |     0.018  |     1.383
+2025-03-24 15:33:58,844 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:33:58,844 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:33:58,845 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:33:58,845 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.724
+2025-03-24 15:33:58,846 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:33:58,846 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:33:58,847 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.116
+2025-03-24 15:33:58,847 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.644
+2025-03-24 15:33:58,847 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:33:58,848 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:34:22,366 - INFO - allennlp.training.trainer - Epoch duration: 0:01:46.809914
+2025-03-24 15:34:22,367 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:29:06
+2025-03-24 15:34:22,367 - INFO - allennlp.training.trainer - Epoch 82/99
+2025-03-24 15:34:22,367 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:34:22,734 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:34:22,749 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'V_1', 'N_1']], [['g_minus', 'N_3', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_3', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'N_2', 'N_0', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_1', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_minus', 'V_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_mul', 'N_2', 'V_0', 'g_sin', 'V_1', 'g_mul', 'V_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']]]
+2025-03-24 15:35:36,978 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:35:45,771 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:35:45,772 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:35:45,772 - INFO - allennlp.training.tensorboard_writer - loss            |     0.019  |     1.380
+2025-03-24 15:35:45,773 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:35:45,773 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:35:45,774 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:35:45,774 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.727
+2025-03-24 15:35:45,775 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:35:45,775 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:35:45,775 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.124
+2025-03-24 15:35:45,776 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.650
+2025-03-24 15:35:45,776 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:35:45,777 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:36:09,614 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.246433
+2025-03-24 15:36:09,614 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:27:31
+2025-03-24 15:36:09,614 - INFO - allennlp.training.trainer - Epoch 83/99
+2025-03-24 15:36:09,615 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:36:09,994 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:36:09,995 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:36:09,995 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:36:09,995 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:36:09,995 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:36:09,995 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:36:09,996 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:36:10,009 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_equal', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_4', 'V_1', 'g_half', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_4', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_3', 'g_minus', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0', 'g_cos', 'V_0', 'g_mul', 'C_5', 'N_0']], [['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']]]
+selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_minus', 'N_1', 'V_1']], [['g_equal', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'N_0', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_equal', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']]]
+2025-03-24 15:37:28,705 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:37:38,898 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:37:38,898 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:37:38,898 - INFO - allennlp.training.tensorboard_writer - loss            |     0.018  |     1.388
+2025-03-24 15:37:38,899 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:37:38,900 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:37:38,900 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:37:38,901 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.733
+2025-03-24 15:37:38,901 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:37:38,901 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:37:38,902 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.113
+2025-03-24 15:37:38,902 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.651
+2025-03-24 15:37:38,903 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:37:38,903 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:38:03,025 - INFO - allennlp.training.trainer - Epoch duration: 0:01:53.410961
+2025-03-24 15:38:03,026 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:25:57
+2025-03-24 15:38:03,026 - INFO - allennlp.training.trainer - Epoch 84/99
+2025-03-24 15:38:03,027 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:38:03,466 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:38:03,491 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0', 'g_half', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_mul', 'N_2', 'N_5']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_minus', 'V_2', 'V_1']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_minus', 'N_1', 'V_1']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1', 'g_minus', 'V_0', 'V_2']], [['gougu_add', 'N_0', 'N_1']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_half', 'V_0', 'g_minus', 'V_2', 'V_1']], [['g_minus', 'C_4', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_2', 'gougu_add', 'V_1', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2', 'gougu_minus', 'N_0', 'V_0']]]
+2025-03-24 15:39:18,135 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:39:27,917 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:39:27,918 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:39:27,919 - INFO - allennlp.training.tensorboard_writer - loss            |     0.018  |     1.389
+2025-03-24 15:39:27,920 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:39:27,920 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:39:27,921 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:39:27,921 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.726
+2025-03-24 15:39:27,921 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:39:27,921 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:39:27,921 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.112
+2025-03-24 15:39:27,922 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.640
+2025-03-24 15:39:27,923 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:39:27,923 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:39:52,351 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.324595
+2025-03-24 15:39:52,352 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:24:22
+2025-03-24 15:39:52,353 - INFO - allennlp.training.trainer - Epoch 85/99
+2025-03-24 15:39:52,353 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:39:52,757 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:39:52,758 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:39:52,758 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:39:52,758 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:39:52,759 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:39:52,759 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:39:52,759 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:39:52,772 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'C_2', 'g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'N_0', 'C_2', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_tan', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'C_2', 'g_minus', 'V_1', 'V_2']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_sin', 'V_1', 'g_divide', 'N_1', 'V_2']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_equal', 'C_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_tan', 'N_0', 'g_mul', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_half', 'C_2', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_equal', 'N_0']]]
+2025-03-24 15:41:10,050 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:41:20,308 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:41:20,309 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:41:20,310 - INFO - allennlp.training.tensorboard_writer - loss            |     0.019  |     1.393
+2025-03-24 15:41:20,311 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:41:20,312 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:41:20,312 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:41:20,312 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.726
+2025-03-24 15:41:20,313 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:41:20,313 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:41:20,313 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.111
+2025-03-24 15:41:20,314 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.640
+2025-03-24 15:41:20,314 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:41:20,314 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:41:44,757 - INFO - allennlp.training.trainer - Epoch duration: 0:01:52.404633
+2025-03-24 15:41:44,758 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:22:47
+2025-03-24 15:41:44,758 - INFO - allennlp.training.trainer - Epoch 86/99
+2025-03-24 15:41:44,759 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:41:45,127 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:41:45,128 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:41:45,128 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:41:45,129 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:41:45,129 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:41:45,129 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:41:45,129 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:41:45,143 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:43:06,361 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:43:16,572 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:43:16,572 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:43:16,573 - INFO - allennlp.training.tensorboard_writer - loss            |     0.018  |     1.396
+2025-03-24 15:43:16,574 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:43:16,575 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:43:16,575 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:43:16,575 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.727
+2025-03-24 15:43:16,576 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:43:16,576 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:43:16,576 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.115
+2025-03-24 15:43:16,577 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.644
+2025-03-24 15:43:16,578 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:43:16,578 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:43:40,417 - INFO - allennlp.training.trainer - Epoch duration: 0:01:55.658674
+2025-03-24 15:43:40,418 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:21:12
+2025-03-24 15:43:40,418 - INFO - allennlp.training.trainer - Epoch 87/99
+2025-03-24 15:43:40,418 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:43:40,775 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:43:40,776 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:43:40,776 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:43:40,777 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:43:40,777 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:43:40,777 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:43:40,777 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:43:40,790 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_2', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'C_2', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_circle_area', 'N_0']], [['g_double', 'N_1', 'g_sin', 'V_0', 'g_mul', 'N_0', 'V_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'g_double', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1']], [['g_bili', 'C_5', 'C_3', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'V_0', 'N_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'cal_cone', 'V_0', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'cal_circle_area', 'V_0']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_half', 'N_0', 'cal_cone', 'V_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['gougu_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'g_add', 'V_0', 'N_2', 'g_add', 'V_1', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_divide', 'N_2', 'N_3', 'g_divide', 'N_4', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']]]
+selected_programs [[['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_equal', 'N_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_1']], [['gougu_minus', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_minus', 'N_2', 'N_2', 'g_add', 'V_0', 'N_1', 'g_add', 'V_1', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_1', 'g_minus', 'V_1', 'N_2', 'g_half', 'V_2']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']]]
+2025-03-24 15:44:54,799 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:45:03,521 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:45:03,522 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:45:03,523 - INFO - allennlp.training.tensorboard_writer - loss            |     0.017  |     1.397
+2025-03-24 15:45:03,523 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:45:03,523 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:45:03,525 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:45:03,525 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.727
+2025-03-24 15:45:03,526 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:45:03,526 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:45:03,526 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.112
+2025-03-24 15:45:03,527 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.647
+2025-03-24 15:45:03,527 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:45:03,528 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:45:27,848 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.430111
+2025-03-24 15:45:27,849 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:19:35
+2025-03-24 15:45:27,849 - INFO - allennlp.training.trainer - Epoch 88/99
+2025-03-24 15:45:27,849 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:45:28,202 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:45:28,216 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_equal', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_4', 'V_1', 'g_half', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'N_0', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_3', 'g_minus', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0', 'g_cos', 'V_0', 'g_mul', 'V_0', 'V_1']], [['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']]]
+selected_programs [[['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0', 'g_half', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_mul', 'N_2', 'N_5']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_half', 'N_0', 'g_half', 'C_3', 'g_add', 'V_0', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_minus', 'N_1', 'V_1']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_cos', 'N_0', 'g_divide', 'N_1', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_minus', 'C_4', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_2', 'gougu_add', 'V_1', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1', 'gougu_add', 'V_0', 'N_2']]]
+selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'V_1', 'N_1']], [['g_minus', 'N_2', 'N_3', 'g_tan', 'N_0', 'g_mul', 'V_0', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_1', 'N_1', 'g_bili', 'N_1', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'V_0', 'N_1', 'g_bili', 'V_1', 'N_2', 'N_1', 'gougu_minus', 'V_2', 'N_3']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_mul', 'N_2', 'V_0', 'g_sin', 'V_1', 'g_mul', 'V_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']]]
+2025-03-24 15:46:43,035 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:46:51,809 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:46:51,810 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:46:51,811 - INFO - allennlp.training.tensorboard_writer - loss            |     0.017  |     1.394
+2025-03-24 15:46:51,812 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:46:51,812 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:46:51,812 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:46:51,812 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.725
+2025-03-24 15:46:51,813 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:46:51,814 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:46:51,814 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.116
+2025-03-24 15:46:51,814 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.644
+2025-03-24 15:46:51,815 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:46:51,815 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:47:15,616 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.766767
+2025-03-24 15:47:15,617 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:17:59
+2025-03-24 15:47:15,617 - INFO - allennlp.training.trainer - Epoch 89/99
+2025-03-24 15:47:15,617 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:47:15,951 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:47:15,952 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:47:15,952 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:47:15,952 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:47:15,953 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:47:15,953 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:47:15,953 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:47:15,966 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0', 'g_half', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_mul', 'N_2', 'N_5']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_half', 'N_0', 'g_half', 'C_3', 'g_add', 'V_0', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_minus', 'N_1', 'V_1']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1', 'g_minus', 'V_0', 'V_2']], [['gougu_add', 'N_0', 'N_1']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_minus', 'C_4', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_2', 'gougu_add', 'V_1', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1', 'gougu_add', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'V_0', 'N_2']]]
+2025-03-24 15:48:31,214 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:48:39,959 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:48:39,960 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:48:39,960 - INFO - allennlp.training.tensorboard_writer - loss            |     0.017  |     1.401
+2025-03-24 15:48:39,960 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:48:39,961 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:48:39,961 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:48:39,961 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.723
+2025-03-24 15:48:39,963 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:48:39,963 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:48:39,963 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.117
+2025-03-24 15:48:39,964 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.635
+2025-03-24 15:48:39,964 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:48:39,965 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:49:03,948 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.330674
+2025-03-24 15:49:03,948 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:16:22
+2025-03-24 15:49:03,949 - INFO - allennlp.training.trainer - Epoch 90/99
+2025-03-24 15:49:03,949 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:49:04,318 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:49:04,333 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'C_2', 'g_half', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1', 'g_double', 'V_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]]
+selected_programs [[['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0', 'g_minus', 'N_0', 'V_0', 'g_add', 'V_0', 'V_1']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']]]
+2025-03-24 15:50:18,618 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:50:27,374 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:50:27,375 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:50:27,376 - INFO - allennlp.training.tensorboard_writer - loss            |     0.015  |     1.402
+2025-03-24 15:50:27,376 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:50:27,376 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:50:27,377 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:50:27,377 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.727
+2025-03-24 15:50:27,378 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:50:27,378 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:50:27,379 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.118
+2025-03-24 15:50:27,379 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.642
+2025-03-24 15:50:27,379 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:50:27,380 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:50:51,348 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.399855
+2025-03-24 15:50:51,349 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:14:44
+2025-03-24 15:50:51,349 - INFO - allennlp.training.trainer - Epoch 91/99
+2025-03-24 15:50:51,350 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:50:51,713 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:50:51,714 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:50:51,714 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:50:51,714 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:50:51,715 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:50:51,715 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:50:51,715 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:50:51,729 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_minus', 'V_1', 'V_0', 'g_minus', 'V_1', 'V_2']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_add', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0']]]
+2025-03-24 15:52:05,803 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:52:14,552 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:52:14,552 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:52:14,552 - INFO - allennlp.training.tensorboard_writer - loss            |     0.016  |     1.409
+2025-03-24 15:52:14,553 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:52:14,553 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:52:14,553 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:52:14,553 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.726
+2025-03-24 15:52:14,555 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:52:14,555 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:52:14,555 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.117
+2025-03-24 15:52:14,556 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.638
+2025-03-24 15:52:14,556 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:52:14,556 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:52:37,802 - INFO - allennlp.training.trainer - Epoch duration: 0:01:46.452376
+2025-03-24 15:52:37,802 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:13:07
+2025-03-24 15:52:37,803 - INFO - allennlp.training.trainer - Epoch 92/99
+2025-03-24 15:52:37,803 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:52:38,158 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:52:38,159 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:52:38,159 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:52:38,160 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:52:38,160 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:52:38,160 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:52:38,160 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:52:38,174 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:53:52,268 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:54:01,017 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:54:01,018 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:54:01,018 - INFO - allennlp.training.tensorboard_writer - loss            |     0.016  |     1.410
+2025-03-24 15:54:01,019 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:54:01,019 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:54:01,020 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:54:01,020 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.729
+2025-03-24 15:54:01,021 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:54:01,021 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:54:01,021 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.126
+2025-03-24 15:54:01,022 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.633
+2025-03-24 15:54:01,022 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:54:01,023 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:54:27,389 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.585760
+2025-03-24 15:54:27,391 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:11:29
+2025-03-24 15:54:27,391 - INFO - allennlp.training.trainer - Epoch 93/99
+2025-03-24 15:54:27,392 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:54:27,742 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:54:27,743 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:54:27,744 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:54:27,744 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:54:27,744 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:54:27,744 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:54:27,744 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:54:27,764 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:55:42,835 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:55:51,645 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:55:51,645 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:55:51,646 - INFO - allennlp.training.tensorboard_writer - loss            |     0.015  |     1.411
+2025-03-24 15:55:51,646 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:55:51,646 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:55:51,648 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:55:51,648 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.729
+2025-03-24 15:55:51,649 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:55:51,649 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:55:51,649 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.122
+2025-03-24 15:55:51,650 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.637
+2025-03-24 15:55:51,650 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:55:51,651 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:56:16,470 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.078993
+2025-03-24 15:56:16,471 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:09:51
+2025-03-24 15:56:16,471 - INFO - allennlp.training.trainer - Epoch 94/99
+2025-03-24 15:56:16,471 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:56:16,858 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:56:16,859 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:56:16,859 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:56:16,860 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:56:16,860 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:56:16,860 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:56:16,860 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:56:16,876 - INFO - allennlp.training.trainer - Training
+2025-03-24 15:57:32,201 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:57:41,037 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:57:41,038 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:57:41,039 - INFO - allennlp.training.tensorboard_writer - loss            |     0.015  |     1.411
+2025-03-24 15:57:41,039 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:57:41,040 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:57:41,041 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:57:41,041 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.728
+2025-03-24 15:57:41,041 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:57:41,041 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:57:41,042 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.118
+2025-03-24 15:57:41,043 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.637
+2025-03-24 15:57:41,043 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:57:41,043 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:58:05,868 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.396995
+2025-03-24 15:58:05,869 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:08:13
+2025-03-24 15:58:05,869 - INFO - allennlp.training.trainer - Epoch 95/99
+2025-03-24 15:58:05,869 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:58:06,283 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:58:06,284 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:58:06,284 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:58:06,285 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:58:06,285 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:58:06,285 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:58:06,285 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:58:06,300 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_minus', 'V_1', 'V_0', 'g_minus', 'V_1', 'V_2']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_add', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0']]]
+selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'V_1', 'N_1']], [['g_minus', 'N_3', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_3', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_1', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'N_1', 'V_0', 'g_mul', 'V_1', 'V_1', 'g_divide', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_tan', 'N_1', 'g_minus', 'V_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']]]
+2025-03-24 15:59:21,191 - INFO - allennlp.training.trainer - Validating
+2025-03-24 15:59:30,059 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 15:59:30,059 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 15:59:30,060 - INFO - allennlp.training.tensorboard_writer - loss            |     0.015  |     1.412
+2025-03-24 15:59:30,061 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 15:59:30,062 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 15:59:30,063 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 15:59:30,063 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.727
+2025-03-24 15:59:30,063 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 15:59:30,064 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 15:59:30,064 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.122
+2025-03-24 15:59:30,065 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.639
+2025-03-24 15:59:30,065 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 15:59:30,066 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 15:59:53,072 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.203361
+2025-03-24 15:59:53,073 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:06:35
+2025-03-24 15:59:53,073 - INFO - allennlp.training.trainer - Epoch 96/99
+2025-03-24 15:59:53,074 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 15:59:53,499 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 15:59:53,500 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 15:59:53,500 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 15:59:53,500 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 15:59:53,501 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 15:59:53,501 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 15:59:53,501 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 15:59:53,523 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'V_1', 'N_1']], [['g_minus', 'N_2', 'N_3', 'g_tan', 'N_0', 'g_mul', 'V_0', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'N_2', 'N_0', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_1', 'N_1', 'g_bili', 'N_1', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'N_1', 'V_0', 'g_bili', 'V_1', 'N_2', 'N_1', 'gougu_minus', 'V_2', 'N_3']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_mul', 'N_2', 'V_0', 'g_sin', 'V_1', 'g_mul', 'V_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0', 'g_add', 'V_1', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']]]
+2025-03-24 16:01:11,210 - INFO - allennlp.training.trainer - Validating
+2025-03-24 16:01:20,012 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 16:01:20,012 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 16:01:20,013 - INFO - allennlp.training.tensorboard_writer - loss            |     0.015  |     1.416
+2025-03-24 16:01:20,013 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 16:01:20,014 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 16:01:20,015 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 16:01:20,015 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.730
+2025-03-24 16:01:20,015 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 16:01:20,015 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 16:01:20,016 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.117
+2025-03-24 16:01:20,016 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.640
+2025-03-24 16:01:20,017 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 16:01:20,017 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 16:01:43,584 - INFO - allennlp.training.trainer - Epoch duration: 0:01:50.510637
+2025-03-24 16:01:43,585 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:04:56
+2025-03-24 16:01:43,585 - INFO - allennlp.training.trainer - Epoch 97/99
+2025-03-24 16:01:43,585 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 16:01:43,934 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 16:01:43,935 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 16:01:43,936 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 16:01:43,936 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 16:01:43,936 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 16:01:43,936 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 16:01:43,936 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 16:01:43,950 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_minus', 'C_2', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'C_2', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_circle_area', 'N_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'g_double', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1']], [['g_bili', 'C_5', 'C_3', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'V_0', 'N_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'cal_cone', 'V_0', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'cal_circle_area', 'V_0']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_half', 'N_0', 'cal_cone', 'V_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'C_4', 'cal_cone', 'V_0', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'V_0', 'C_4']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_divide', 'N_2', 'N_3', 'g_divide', 'N_4', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']]]
+selected_programs [[['g_half', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_mul', 'V_1', 'V_0']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_add', 'N_0', 'C_2', 'g_half', 'V_0', 'g_minus', 'C_3', 'C_2', 'g_minus', 'V_2', 'V_1']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_1', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_half', 'V_0', 'g_add', 'V_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0', 'g_add', 'V_1', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'N_0', 'g_add', 'V_1', 'V_0']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'V_0', 'N_2', 'gougu_minus', 'V_1', 'V_0', 'g_divide', 'V_2', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]]
+2025-03-24 16:03:04,087 - INFO - allennlp.training.trainer - Validating
+2025-03-24 16:03:14,310 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 16:03:14,311 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 16:03:14,312 - INFO - allennlp.training.tensorboard_writer - loss            |     0.015  |     1.422
+2025-03-24 16:03:14,313 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 16:03:14,313 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 16:03:14,314 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 16:03:14,314 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.730
+2025-03-24 16:03:14,314 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 16:03:14,315 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 16:03:14,315 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.116
+2025-03-24 16:03:14,316 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.640
+2025-03-24 16:03:14,316 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 16:03:14,317 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 16:03:33,659 - INFO - allennlp.training.trainer - Epoch duration: 0:01:50.074599
+2025-03-24 16:03:33,660 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:03:18
+2025-03-24 16:03:33,661 - INFO - allennlp.training.trainer - Epoch 98/99
+2025-03-24 16:03:33,661 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 16:03:34,011 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 16:03:34,012 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 16:03:34,012 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 16:03:34,013 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 16:03:34,013 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 16:03:34,013 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 16:03:34,013 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 16:03:34,027 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'C_2', 'g_half', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1', 'g_double', 'V_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]]
+2025-03-24 16:04:49,386 - INFO - allennlp.training.trainer - Validating
+2025-03-24 16:04:58,283 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 16:04:58,284 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 16:04:58,284 - INFO - allennlp.training.tensorboard_writer - loss            |     0.015  |     1.418
+2025-03-24 16:04:58,285 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 16:04:58,285 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 16:04:58,286 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 16:04:58,287 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.729
+2025-03-24 16:04:58,287 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 16:04:58,287 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 16:04:58,288 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.119
+2025-03-24 16:04:58,288 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.638
+2025-03-24 16:04:58,289 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 16:04:58,289 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 16:05:21,126 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.464991
+2025-03-24 16:05:21,126 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:01:39
+2025-03-24 16:05:21,127 - INFO - allennlp.training.trainer - Epoch 99/99
+2025-03-24 16:05:21,127 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668
+2025-03-24 16:05:21,499 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5
+2025-03-24 16:05:21,500 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5
+2025-03-24 16:05:21,501 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5
+2025-03-24 16:05:21,501 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5
+2025-03-24 16:05:21,501 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662
+2025-03-24 16:05:21,501 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5
+2025-03-24 16:05:21,501 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5
+2025-03-24 16:05:21,514 - INFO - allennlp.training.trainer - Training
+selected_programs [[['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'C_2', 'g_half', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1', 'g_double', 'V_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]]
+2025-03-24 16:06:37,015 - INFO - allennlp.training.trainer - Validating
+2025-03-24 16:06:45,843 - INFO - allennlp.training.tensorboard_writer -                     Training |  Validation
+2025-03-24 16:06:45,843 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB |     5.000  |       N/A
+2025-03-24 16:06:45,843 - INFO - allennlp.training.tensorboard_writer - loss            |     0.014  |     1.417
+2025-03-24 16:06:45,844 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB |     5.000  |       N/A
+2025-03-24 16:06:45,845 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB |  17662.000  |       N/A
+2025-03-24 16:06:45,845 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB |     5.000  |       N/A
+2025-03-24 16:06:45,845 - INFO - allennlp.training.tensorboard_writer - BLEU            |       N/A  |     0.727
+2025-03-24 16:06:45,846 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB   |  4772.668  |       N/A
+2025-03-24 16:06:45,846 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB |     5.000  |       N/A
+2025-03-24 16:06:45,847 - INFO - allennlp.training.tensorboard_writer - no_result       |     0.000  |     0.118
+2025-03-24 16:06:45,847 - INFO - allennlp.training.tensorboard_writer - acc             |     0.000  |     0.642
+2025-03-24 16:06:45,848 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB |     5.000  |       N/A
+2025-03-24 16:06:45,848 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB |     5.000  |       N/A
+2025-03-24 16:07:07,315 - INFO - allennlp.training.trainer - Epoch duration: 0:01:46.187999
+2025-03-24 16:07:07,357 - INFO - allennlp.training.checkpointer - loading best weights
+2025-03-24 16:07:08,001 - INFO - allennlp.commands.train - The model will be evaluated using the best epoch weights.
+2025-03-24 16:07:08,004 - INFO - allennlp.training.util - Iterating over dataset
+selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_0', 'C_1', 'g_sin', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_add', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'N_1', 'g_minus', 'V_2', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']]]
diff --git a/vocabulary/non_padded_namespaces.txt b/vocabulary/non_padded_namespaces.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5dee50c4400824e195a609940a9f9c9abad69b5
--- /dev/null
+++ b/vocabulary/non_padded_namespaces.txt
@@ -0,0 +1,2 @@
+*tags
+*labels
diff --git a/vocabulary/tokens.txt b/vocabulary/tokens.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87dd25151a4bbf27df5c854daba4c88a934df1f3
--- /dev/null
+++ b/vocabulary/tokens.txt
@@ -0,0 +1,43 @@
+@@UNKNOWN@@
+N_0
+@start@
+@end@
+V_0
+g_minus
+N_1
+g_half
+V_1
+C_3
+g_double
+N_2
+g_add
+C_2
+g_divide
+g_bili
+g_mul
+V_2
+gougu_minus
+gougu_add
+g_equal
+C_4
+g_sin
+N_3
+g_tan
+cal_circle_area
+cal_cone
+N_4
+g_cos
+C_1
+N_5
+C_5
+C_0
+N_6
+N_7
+cal_circle_perimeter
+N_8
+N_9
+g_asin
+N_11
+g_acos
+N_10
+C_6