diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52364b0e92698cf48671e2f03bed82c10ef14804 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +best.th filter=lfs diff=lfs merge=lfs -text diff --git a/best.th b/best.th new file mode 100644 index 0000000000000000000000000000000000000000..3b7ae8a69bc4faa4bc00b4e3b98e802af25ee256 --- /dev/null +++ b/best.th @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9018ae76c648daff6f3de5d7cb1870fca81537c9024df37e5f77001de0249869 +size 1169366338 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7db64d1c3a923223751a9aba9aa5bb63d5b6eb62 --- /dev/null +++ b/config.json @@ -0,0 +1,109 @@ +{ + "dataset_reader": { + "type": "s2s_manual_reader", + "source_token_indexer": { + "tokens": { + "type": "pretrained_transformer", + "do_lowercase": false, + "model_name": "./roberta" + } + }, + "target_token_indexer": { + "tokens": { + "type": "single_id" + } + }, + "tokenizer": { + "word_splitter": { + "type": "just_spaces" + } + } + }, + "iterator": { + "type": "basic", + "batch_size": 32 + }, + "model": { + "type": "geo_s2s", + "beam_size": 10, + "encoder": { + "dropout": 0.5, + "emb_dim": 768, + "hid_dim": 512, + "input_dim": 21128 + }, + "knowledge_points_ratio": 0, + "max_decoding_steps": 16, + "resnet_pretrained": "./", + "scheduled_sampling_ratio": 0, + "source_embedder": { + "token_embedders": {} + }, + "target_embedding_dim": 512 + }, + "train_data_path": "./GeoQA-Data/GeoQA-Pro/pro_train.pk", + "validation_data_path": "./GeoQA-Data/GeoQA-Pro/pro_dev.pk", + "test_data_path": "./GeoQA-Data/GeoQA-Pro/pro_test.pk", + "trainer": { + "cuda_device": 0, + "grad_norm": 10, + "learning_rate_scheduler": { + "type": "reduce_on_plateau", + "factor": 0.6, + "mode": "max", + "patience": 5 + }, + "num_epochs": 100, + "optimizer": { + "type": "adam", + "lr": 0.001, + "parameter_groups": [ + [ + [ + "mcan", + "merge_att", + "channel_transform", + "attflat_img", + "attflat_lang", + "decode_transform" + ], + { + "lr": 1e-05 + } + ], + [ + [ + "resnet" + ], + { + "lr": 1e-05 + } + ], + [ + [ + "source_embedder", + "encoder.embedding" + ], + { + "lr": 2e-05 + } + ], + [ + [ + "encoder.concat_trans", + "encoder.concat_trans_", + "encoder.lstm_embedding", + "encoder.trans", + "encoder.norm", + "encoder.concat_norm" + ], + { + "lr": 0.001 + } + ] + ] + }, + "validation_metric": "+acc" + }, + "evaluate_on_test": true +} \ No newline at end of file diff --git a/log/train/events.out.tfevents.1742793704.amax b/log/train/events.out.tfevents.1742793704.amax new file mode 100644 index 0000000000000000000000000000000000000000..2fb0d005fca123cb78d6ac07c9cf7b4497d6c24d --- /dev/null +++ b/log/train/events.out.tfevents.1742793704.amax @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1563426608092d5cb56619581eb9df3a627fdca704c262ae175dcb876795bbf0 +size 31502604 diff --git a/log/validation/events.out.tfevents.1742793704.amax b/log/validation/events.out.tfevents.1742793704.amax new file mode 100644 index 0000000000000000000000000000000000000000..1d11d6c6f390fec9d7b05e177ce6c01690e00fbf --- /dev/null +++ b/log/validation/events.out.tfevents.1742793704.amax @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:490c22130c5c3b3a371839fb3d576de6f3a6d134ad253c8feb5bc24a277133eb +size 17240 diff --git a/metrics.json b/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..7ba33a5245790f0d49ea5dc46238bf3ce3182d92 --- /dev/null +++ b/metrics.json @@ -0,0 +1,38 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:45:01.064397", + "training_start_epoch": 0, + "training_epochs": 99, + "epoch": 99, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.013839077864858237, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7266542542114865, + "validation_acc": 0.6417824074074074, + "validation_no_result": 0.11848958333333333, + "validation_loss": 1.416664329667886, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262, + "test_BLEU": 0.704433949149771, + "test_acc": 0.6443865740740741, + "test_no_result": 0.11559606481481481, + "test_loss": 1.2252581814924877 +} \ No newline at end of file diff --git a/metrics_epoch_0.json b/metrics_epoch_0.json new file mode 100644 index 0000000000000000000000000000000000000000..6be12a07115514826be628c7c5127417cc1119f1 --- /dev/null +++ b/metrics_epoch_0.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 0, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 1974, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:01:29.098765", + "training_start_epoch": 0, + "training_epochs": 0, + "epoch": 0, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 1.6868594841523603, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 1974, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.23317665829903492, + "validation_acc": 0.36675347222222227, + "validation_no_result": 0.19661458333333334, + "validation_loss": 1.2027941942214966, + "best_validation_BLEU": 0.23317665829903492, + "best_validation_acc": 0.36675347222222227, + "best_validation_no_result": 0.19661458333333334, + "best_validation_loss": 1.2027941942214966 +} \ No newline at end of file diff --git a/metrics_epoch_1.json b/metrics_epoch_1.json new file mode 100644 index 0000000000000000000000000000000000000000..584e458b0eb0dd2019fa5b1a1a2352e13412f762 --- /dev/null +++ b/metrics_epoch_1.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 1, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17628, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:02:54.787197", + "training_start_epoch": 0, + "training_epochs": 1, + "epoch": 1, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 1.1222382789308374, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17628, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.35292654895689146, + "validation_acc": 0.42476851851851855, + "validation_no_result": 0.16666666666666666, + "validation_loss": 1.0453250408172607, + "best_validation_BLEU": 0.35292654895689146, + "best_validation_acc": 0.42476851851851855, + "best_validation_no_result": 0.16666666666666666, + "best_validation_loss": 1.0453250408172607 +} \ No newline at end of file diff --git a/metrics_epoch_10.json b/metrics_epoch_10.json new file mode 100644 index 0000000000000000000000000000000000000000..2f8ff654d183ffb2019bcbf027eb7377a7d05fe2 --- /dev/null +++ b/metrics_epoch_10.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 10, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:15:48.319747", + "training_start_epoch": 0, + "training_epochs": 10, + "epoch": 10, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.6800853114236485, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.43079156908061556, + "validation_acc": 0.5237268518518519, + "validation_no_result": 0.1205150462962963, + "validation_loss": 0.7902998005350431, + "best_validation_BLEU": 0.43079156908061556, + "best_validation_acc": 0.5237268518518519, + "best_validation_no_result": 0.1205150462962963, + "best_validation_loss": 0.7902998005350431 +} \ No newline at end of file diff --git a/metrics_epoch_11.json b/metrics_epoch_11.json new file mode 100644 index 0000000000000000000000000000000000000000..175dca4dd64a9ecd1b6066a64f9de0d58b2d411c --- /dev/null +++ b/metrics_epoch_11.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 10, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:17:16.634014", + "training_start_epoch": 0, + "training_epochs": 11, + "epoch": 11, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.6558576405048371, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.407816076308465, + "validation_acc": 0.4982638888888889, + "validation_no_result": 0.11791087962962964, + "validation_loss": 0.7638036062320074, + "best_validation_BLEU": 0.43079156908061556, + "best_validation_acc": 0.5237268518518519, + "best_validation_no_result": 0.1205150462962963, + "best_validation_loss": 0.7902998005350431 +} \ No newline at end of file diff --git a/metrics_epoch_12.json b/metrics_epoch_12.json new file mode 100644 index 0000000000000000000000000000000000000000..b170da79384ce5f14aafc90fc6abfedb60d63952 --- /dev/null +++ b/metrics_epoch_12.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 10, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:18:44.602728", + "training_start_epoch": 0, + "training_epochs": 12, + "epoch": 12, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.631023341959173, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.4312758899332716, + "validation_acc": 0.5112847222222222, + "validation_no_result": 0.1127025462962963, + "validation_loss": 0.766788254181544, + "best_validation_BLEU": 0.43079156908061556, + "best_validation_acc": 0.5237268518518519, + "best_validation_no_result": 0.1205150462962963, + "best_validation_loss": 0.7902998005350431 +} \ No newline at end of file diff --git a/metrics_epoch_13.json b/metrics_epoch_13.json new file mode 100644 index 0000000000000000000000000000000000000000..354323d9a7b342c018c865bb8ed96277c613252b --- /dev/null +++ b/metrics_epoch_13.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 13, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:20:12.304651", + "training_start_epoch": 0, + "training_epochs": 13, + "epoch": 13, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.590884352272207, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.5037565205317504, + "validation_acc": 0.5601851851851852, + "validation_no_result": 0.09577546296296297, + "validation_loss": 0.7520468930403391, + "best_validation_BLEU": 0.5037565205317504, + "best_validation_acc": 0.5601851851851852, + "best_validation_no_result": 0.09577546296296297, + "best_validation_loss": 0.7520468930403391 +} \ No newline at end of file diff --git a/metrics_epoch_14.json b/metrics_epoch_14.json new file mode 100644 index 0000000000000000000000000000000000000000..d8f1b1744494fbe4167173b91b33d668210b9e55 --- /dev/null +++ b/metrics_epoch_14.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 14, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:21:39.478923", + "training_start_epoch": 0, + "training_epochs": 14, + "epoch": 14, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.5659671618179841, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.5427631598053875, + "validation_acc": 0.5784143518518519, + "validation_no_result": 0.09577546296296297, + "validation_loss": 0.7350192765394846, + "best_validation_BLEU": 0.5427631598053875, + "best_validation_acc": 0.5784143518518519, + "best_validation_no_result": 0.09577546296296297, + "best_validation_loss": 0.7350192765394846 +} \ No newline at end of file diff --git a/metrics_epoch_15.json b/metrics_epoch_15.json new file mode 100644 index 0000000000000000000000000000000000000000..3a28ee29aac48169191967a7de8c9696fa397863 --- /dev/null +++ b/metrics_epoch_15.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 14, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:23:06.755342", + "training_start_epoch": 0, + "training_epochs": 15, + "epoch": 15, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.5331384379755367, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.5071943127904087, + "validation_acc": 0.5536747685185185, + "validation_no_result": 0.10228587962962964, + "validation_loss": 0.7253126613795757, + "best_validation_BLEU": 0.5427631598053875, + "best_validation_acc": 0.5784143518518519, + "best_validation_no_result": 0.09577546296296297, + "best_validation_loss": 0.7350192765394846 +} \ No newline at end of file diff --git a/metrics_epoch_16.json b/metrics_epoch_16.json new file mode 100644 index 0000000000000000000000000000000000000000..2c567cacaff57e650b5cc21790b1c9377ba686d4 --- /dev/null +++ b/metrics_epoch_16.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 14, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:24:34.104787", + "training_start_epoch": 0, + "training_epochs": 16, + "epoch": 16, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.509379303726283, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.5108672141994016, + "validation_acc": 0.5536747685185185, + "validation_no_result": 0.109375, + "validation_loss": 0.7373132693270842, + "best_validation_BLEU": 0.5427631598053875, + "best_validation_acc": 0.5784143518518519, + "best_validation_no_result": 0.09577546296296297, + "best_validation_loss": 0.7350192765394846 +} \ No newline at end of file diff --git a/metrics_epoch_17.json b/metrics_epoch_17.json new file mode 100644 index 0000000000000000000000000000000000000000..12997471217567dcbaa387e077b6049ec2118392 --- /dev/null +++ b/metrics_epoch_17.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 14, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:26:00.758793", + "training_start_epoch": 0, + "training_epochs": 17, + "epoch": 17, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.485344271497293, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.5599993581570306, + "validation_acc": 0.5706018518518519, + "validation_no_result": 0.09186921296296297, + "validation_loss": 0.7641393554707369, + "best_validation_BLEU": 0.5427631598053875, + "best_validation_acc": 0.5784143518518519, + "best_validation_no_result": 0.09577546296296297, + "best_validation_loss": 0.7350192765394846 +} \ No newline at end of file diff --git a/metrics_epoch_18.json b/metrics_epoch_18.json new file mode 100644 index 0000000000000000000000000000000000000000..c6c02efe3316ebc09e67706419d0136a8dd49ca7 --- /dev/null +++ b/metrics_epoch_18.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 14, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:27:31.038986", + "training_start_epoch": 0, + "training_epochs": 18, + "epoch": 18, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.46907018016685137, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.5504686593347489, + "validation_acc": 0.5653935185185185, + "validation_no_result": 0.12181712962962964, + "validation_loss": 0.772665457179149, + "best_validation_BLEU": 0.5427631598053875, + "best_validation_acc": 0.5784143518518519, + "best_validation_no_result": 0.09577546296296297, + "best_validation_loss": 0.7350192765394846 +} \ No newline at end of file diff --git a/metrics_epoch_19.json b/metrics_epoch_19.json new file mode 100644 index 0000000000000000000000000000000000000000..0281c2b88bf5ba412f6caaf80ac5be25767a1676 --- /dev/null +++ b/metrics_epoch_19.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 14, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:28:55.198844", + "training_start_epoch": 0, + "training_epochs": 19, + "epoch": 19, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.44485945268110794, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.5366820134380189, + "validation_acc": 0.5679976851851852, + "validation_no_result": 0.09447337962962964, + "validation_loss": 0.7757748365402222, + "best_validation_BLEU": 0.5427631598053875, + "best_validation_acc": 0.5784143518518519, + "best_validation_no_result": 0.09577546296296297, + "best_validation_loss": 0.7350192765394846 +} \ No newline at end of file diff --git a/metrics_epoch_2.json b/metrics_epoch_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e0f270c5311e90932c2dee20c56f639d53e2687d --- /dev/null +++ b/metrics_epoch_2.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 1, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:04:23.165231", + "training_start_epoch": 0, + "training_epochs": 2, + "epoch": 2, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 1.0030749413100155, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.28082488387199267, + "validation_acc": 0.41753472222222227, + "validation_no_result": 0.17447916666666666, + "validation_loss": 0.9994873702526093, + "best_validation_BLEU": 0.35292654895689146, + "best_validation_acc": 0.42476851851851855, + "best_validation_no_result": 0.16666666666666666, + "best_validation_loss": 1.0453250408172607 +} \ No newline at end of file diff --git a/metrics_epoch_20.json b/metrics_epoch_20.json new file mode 100644 index 0000000000000000000000000000000000000000..69d4c7f9e87961cfa225e4a2b890e622ed6559a8 --- /dev/null +++ b/metrics_epoch_20.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 20, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:30:24.957092", + "training_start_epoch": 0, + "training_epochs": 20, + "epoch": 20, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.4409686741503802, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.5588327729022262, + "validation_acc": 0.5817418981481481, + "validation_no_result": 0.10358796296296297, + "validation_loss": 0.7639288852612177, + "best_validation_BLEU": 0.5588327729022262, + "best_validation_acc": 0.5817418981481481, + "best_validation_no_result": 0.10358796296296297, + "best_validation_loss": 0.7639288852612177 +} \ No newline at end of file diff --git a/metrics_epoch_21.json b/metrics_epoch_21.json new file mode 100644 index 0000000000000000000000000000000000000000..bf219935569453e770cbfc8d340849978e6aa6d1 --- /dev/null +++ b/metrics_epoch_21.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 21, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:31:50.125831", + "training_start_epoch": 0, + "training_epochs": 21, + "epoch": 21, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.4123601945963773, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6021588888536067, + "validation_acc": 0.5875289351851852, + "validation_no_result": 0.10228587962962964, + "validation_loss": 0.77462221433719, + "best_validation_BLEU": 0.6021588888536067, + "best_validation_acc": 0.5875289351851852, + "best_validation_no_result": 0.10228587962962964, + "best_validation_loss": 0.77462221433719 +} \ No newline at end of file diff --git a/metrics_epoch_22.json b/metrics_epoch_22.json new file mode 100644 index 0000000000000000000000000000000000000000..87b80003a4e15eb2442553dfdb4cbdc4683515a0 --- /dev/null +++ b/metrics_epoch_22.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 22, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:33:17.006857", + "training_start_epoch": 0, + "training_epochs": 22, + "epoch": 22, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.38905653899366205, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6125124234113125, + "validation_acc": 0.5992476851851852, + "validation_no_result": 0.0970775462962963, + "validation_loss": 0.7628330377240976, + "best_validation_BLEU": 0.6125124234113125, + "best_validation_acc": 0.5992476851851852, + "best_validation_no_result": 0.0970775462962963, + "best_validation_loss": 0.7628330377240976 +} \ No newline at end of file diff --git a/metrics_epoch_23.json b/metrics_epoch_23.json new file mode 100644 index 0000000000000000000000000000000000000000..3b87dceed41fc94b0ed1d50f8a867244fd9daae2 --- /dev/null +++ b/metrics_epoch_23.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 22, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:34:40.839620", + "training_start_epoch": 0, + "training_epochs": 23, + "epoch": 23, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.3673159715804187, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6080388160655757, + "validation_acc": 0.5986689814814815, + "validation_no_result": 0.1166087962962963, + "validation_loss": 0.8231359881659349, + "best_validation_BLEU": 0.6125124234113125, + "best_validation_acc": 0.5992476851851852, + "best_validation_no_result": 0.0970775462962963, + "best_validation_loss": 0.7628330377240976 +} \ No newline at end of file diff --git a/metrics_epoch_24.json b/metrics_epoch_24.json new file mode 100644 index 0000000000000000000000000000000000000000..dbc16c584831de3cb25ece4c6150dddd2a805ebe --- /dev/null +++ b/metrics_epoch_24.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 22, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:36:03.591584", + "training_start_epoch": 0, + "training_epochs": 24, + "epoch": 24, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.3589384977113117, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6044499896265613, + "validation_acc": 0.5985243055555556, + "validation_no_result": 0.10951967592592593, + "validation_loss": 0.8028569320837656, + "best_validation_BLEU": 0.6125124234113125, + "best_validation_acc": 0.5992476851851852, + "best_validation_no_result": 0.0970775462962963, + "best_validation_loss": 0.7628330377240976 +} \ No newline at end of file diff --git a/metrics_epoch_25.json b/metrics_epoch_25.json new file mode 100644 index 0000000000000000000000000000000000000000..b444d745410db4b71e8412231c12e700cee8837f --- /dev/null +++ b/metrics_epoch_25.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 25, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:37:26.154131", + "training_start_epoch": 0, + "training_epochs": 25, + "epoch": 25, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.34136158214374024, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.5987859616867737, + "validation_acc": 0.6005497685185185, + "validation_no_result": 0.11863425925925926, + "validation_loss": 0.8074321200450262, + "best_validation_BLEU": 0.5987859616867737, + "best_validation_acc": 0.6005497685185185, + "best_validation_no_result": 0.11863425925925926, + "best_validation_loss": 0.8074321200450262 +} \ No newline at end of file diff --git a/metrics_epoch_26.json b/metrics_epoch_26.json new file mode 100644 index 0000000000000000000000000000000000000000..504be34b77388dca5a48b3177ecd0c29df99555a --- /dev/null +++ b/metrics_epoch_26.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 25, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:38:48.554449", + "training_start_epoch": 0, + "training_epochs": 26, + "epoch": 26, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.3194368757984855, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6175642128698793, + "validation_acc": 0.5966435185185185, + "validation_no_result": 0.11082175925925926, + "validation_loss": 0.8277938465277354, + "best_validation_BLEU": 0.5987859616867737, + "best_validation_acc": 0.6005497685185185, + "best_validation_no_result": 0.11863425925925926, + "best_validation_loss": 0.8074321200450262 +} \ No newline at end of file diff --git a/metrics_epoch_27.json b/metrics_epoch_27.json new file mode 100644 index 0000000000000000000000000000000000000000..94d86b318c281a1ea06f0aff9a35844eb835a3b2 --- /dev/null +++ b/metrics_epoch_27.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 27, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:40:11.798522", + "training_start_epoch": 0, + "training_epochs": 27, + "epoch": 27, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.30391626872799615, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6246666258278749, + "validation_acc": 0.6031539351851852, + "validation_no_result": 0.12514467592592593, + "validation_loss": 0.8511058837175369, + "best_validation_BLEU": 0.6246666258278749, + "best_validation_acc": 0.6031539351851852, + "best_validation_no_result": 0.12514467592592593, + "best_validation_loss": 0.8511058837175369 +} \ No newline at end of file diff --git a/metrics_epoch_28.json b/metrics_epoch_28.json new file mode 100644 index 0000000000000000000000000000000000000000..951878495cae78b0058e8c4d4caf13630ec461fc --- /dev/null +++ b/metrics_epoch_28.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 27, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:41:35.626352", + "training_start_epoch": 0, + "training_epochs": 28, + "epoch": 28, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.29144989455288106, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.648115855550148, + "validation_acc": 0.6018518518518519, + "validation_no_result": 0.1087962962962963, + "validation_loss": 0.8601374352971712, + "best_validation_BLEU": 0.6246666258278749, + "best_validation_acc": 0.6031539351851852, + "best_validation_no_result": 0.12514467592592593, + "best_validation_loss": 0.8511058837175369 +} \ No newline at end of file diff --git a/metrics_epoch_29.json b/metrics_epoch_29.json new file mode 100644 index 0000000000000000000000000000000000000000..2adc13597a324ef933c59293850f606300c91b60 --- /dev/null +++ b/metrics_epoch_29.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 29, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:42:59.318964", + "training_start_epoch": 0, + "training_epochs": 29, + "epoch": 29, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.26861848641525615, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6362762956373789, + "validation_acc": 0.6129918981481481, + "validation_no_result": 0.11400462962962964, + "validation_loss": 0.8515521312753359, + "best_validation_BLEU": 0.6362762956373789, + "best_validation_acc": 0.6129918981481481, + "best_validation_no_result": 0.11400462962962964, + "best_validation_loss": 0.8515521312753359 +} \ No newline at end of file diff --git a/metrics_epoch_3.json b/metrics_epoch_3.json new file mode 100644 index 0000000000000000000000000000000000000000..3974057b06fc5e141cb099530315ae13113ae65f --- /dev/null +++ b/metrics_epoch_3.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 1, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:05:51.190753", + "training_start_epoch": 0, + "training_epochs": 3, + "epoch": 3, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.9417703888633034, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.3121575177212757, + "validation_acc": 0.41681134259259256, + "validation_no_result": 0.17317708333333334, + "validation_loss": 0.9283205419778824, + "best_validation_BLEU": 0.35292654895689146, + "best_validation_acc": 0.42476851851851855, + "best_validation_no_result": 0.16666666666666666, + "best_validation_loss": 1.0453250408172607 +} \ No newline at end of file diff --git a/metrics_epoch_30.json b/metrics_epoch_30.json new file mode 100644 index 0000000000000000000000000000000000000000..c21f938414686c7f22bef218c4c7722688671874 --- /dev/null +++ b/metrics_epoch_30.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 29, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:44:36.049061", + "training_start_epoch": 0, + "training_epochs": 30, + "epoch": 30, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.25431940298188815, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6410739070257515, + "validation_acc": 0.5907118055555556, + "validation_no_result": 0.1043113425925926, + "validation_loss": 0.89767703662316, + "best_validation_BLEU": 0.6362762956373789, + "best_validation_acc": 0.6129918981481481, + "best_validation_no_result": 0.11400462962962964, + "best_validation_loss": 0.8515521312753359 +} \ No newline at end of file diff --git a/metrics_epoch_31.json b/metrics_epoch_31.json new file mode 100644 index 0000000000000000000000000000000000000000..eb886075633242cb78d508601db186714c3ffa2f --- /dev/null +++ b/metrics_epoch_31.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 29, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:45:58.919362", + "training_start_epoch": 0, + "training_epochs": 31, + "epoch": 31, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.2436353249983354, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6502029305796647, + "validation_acc": 0.6038773148148148, + "validation_no_result": 0.11921296296296297, + "validation_loss": 0.8545631468296051, + "best_validation_BLEU": 0.6362762956373789, + "best_validation_acc": 0.6129918981481481, + "best_validation_no_result": 0.11400462962962964, + "best_validation_loss": 0.8515521312753359 +} \ No newline at end of file diff --git a/metrics_epoch_32.json b/metrics_epoch_32.json new file mode 100644 index 0000000000000000000000000000000000000000..f9d46916dc92ca524517247a96e3cb6c77460893 --- /dev/null +++ b/metrics_epoch_32.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 29, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:47:27.298834", + "training_start_epoch": 0, + "training_epochs": 32, + "epoch": 32, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.24082526537505064, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6210247267996586, + "validation_acc": 0.5901331018518519, + "validation_no_result": 0.11921296296296297, + "validation_loss": 0.9147681097189585, + "best_validation_BLEU": 0.6362762956373789, + "best_validation_acc": 0.6129918981481481, + "best_validation_no_result": 0.11400462962962964, + "best_validation_loss": 0.8515521312753359 +} \ No newline at end of file diff --git a/metrics_epoch_33.json b/metrics_epoch_33.json new file mode 100644 index 0000000000000000000000000000000000000000..3a40cb1321edc0e78527214b77f808de2c863684 --- /dev/null +++ b/metrics_epoch_33.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 29, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:48:57.872746", + "training_start_epoch": 0, + "training_epochs": 33, + "epoch": 33, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.22514350820671428, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6454995678751785, + "validation_acc": 0.5941840277777778, + "validation_no_result": 0.1171875, + "validation_loss": 0.9150293692946434, + "best_validation_BLEU": 0.6362762956373789, + "best_validation_acc": 0.6129918981481481, + "best_validation_no_result": 0.11400462962962964, + "best_validation_loss": 0.8515521312753359 +} \ No newline at end of file diff --git a/metrics_epoch_34.json b/metrics_epoch_34.json new file mode 100644 index 0000000000000000000000000000000000000000..bd87d98412e38b872c64b03b21f73fda4ea0ae0b --- /dev/null +++ b/metrics_epoch_34.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 29, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:50:26.974149", + "training_start_epoch": 0, + "training_epochs": 34, + "epoch": 34, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.20833875049244274, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6765339411713064, + "validation_acc": 0.5921585648148148, + "validation_no_result": 0.11400462962962964, + "validation_loss": 0.9229913602272669, + "best_validation_BLEU": 0.6362762956373789, + "best_validation_acc": 0.6129918981481481, + "best_validation_no_result": 0.11400462962962964, + "best_validation_loss": 0.8515521312753359 +} \ No newline at end of file diff --git a/metrics_epoch_35.json b/metrics_epoch_35.json new file mode 100644 index 0000000000000000000000000000000000000000..45639dc05dbfe85da1746675f70b5d9c1eadc242 --- /dev/null +++ b/metrics_epoch_35.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 35, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:51:51.108377", + "training_start_epoch": 0, + "training_epochs": 35, + "epoch": 35, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.1996874829584902, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6651120785884496, + "validation_acc": 0.6176215277777778, + "validation_no_result": 0.09635416666666667, + "validation_loss": 0.9561783224344254, + "best_validation_BLEU": 0.6651120785884496, + "best_validation_acc": 0.6176215277777778, + "best_validation_no_result": 0.09635416666666667, + "best_validation_loss": 0.9561783224344254 +} \ No newline at end of file diff --git a/metrics_epoch_36.json b/metrics_epoch_36.json new file mode 100644 index 0000000000000000000000000000000000000000..16812b9ca985694d7b0449dd6aea6d4449d51aa3 --- /dev/null +++ b/metrics_epoch_36.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 36, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:53:37.984899", + "training_start_epoch": 0, + "training_epochs": 36, + "epoch": 36, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.19211301336234266, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6725576195507628, + "validation_acc": 0.6215277777777778, + "validation_no_result": 0.10677083333333333, + "validation_loss": 0.9550605937838554, + "best_validation_BLEU": 0.6725576195507628, + "best_validation_acc": 0.6215277777777778, + "best_validation_no_result": 0.10677083333333333, + "best_validation_loss": 0.9550605937838554 +} \ No newline at end of file diff --git a/metrics_epoch_37.json b/metrics_epoch_37.json new file mode 100644 index 0000000000000000000000000000000000000000..52cb8b6ad3abc2700fcabc16be3948374975808d --- /dev/null +++ b/metrics_epoch_37.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 37, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:55:23.433659", + "training_start_epoch": 0, + "training_epochs": 37, + "epoch": 37, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.17771621271967888, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6769016467773605, + "validation_acc": 0.6247106481481481, + "validation_no_result": 0.1087962962962963, + "validation_loss": 0.9989128684004148, + "best_validation_BLEU": 0.6769016467773605, + "best_validation_acc": 0.6247106481481481, + "best_validation_no_result": 0.1087962962962963, + "best_validation_loss": 0.9989128684004148 +} \ No newline at end of file diff --git a/metrics_epoch_38.json b/metrics_epoch_38.json new file mode 100644 index 0000000000000000000000000000000000000000..2c8053505e2d1f1ffe59e0d8e79da55082279127 --- /dev/null +++ b/metrics_epoch_38.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 37, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:57:10.320601", + "training_start_epoch": 0, + "training_epochs": 38, + "epoch": 38, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.1800003968179226, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6738853719385639, + "validation_acc": 0.6208043981481481, + "validation_no_result": 0.1166087962962963, + "validation_loss": 0.9844465777277946, + "best_validation_BLEU": 0.6769016467773605, + "best_validation_acc": 0.6247106481481481, + "best_validation_no_result": 0.1087962962962963, + "best_validation_loss": 0.9989128684004148 +} \ No newline at end of file diff --git a/metrics_epoch_39.json b/metrics_epoch_39.json new file mode 100644 index 0000000000000000000000000000000000000000..e2b3d7a00a09205640d59b47d308d9139c0d2c38 --- /dev/null +++ b/metrics_epoch_39.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 37, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:58:47.382407", + "training_start_epoch": 0, + "training_epochs": 39, + "epoch": 39, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.17429773367264054, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6865457621040599, + "validation_acc": 0.5914351851851852, + "validation_no_result": 0.1205150462962963, + "validation_loss": 1.013327990969022, + "best_validation_BLEU": 0.6769016467773605, + "best_validation_acc": 0.6247106481481481, + "best_validation_no_result": 0.1087962962962963, + "best_validation_loss": 0.9989128684004148 +} \ No newline at end of file diff --git a/metrics_epoch_4.json b/metrics_epoch_4.json new file mode 100644 index 0000000000000000000000000000000000000000..5d598cfe944965cce64dbe1cc99e3ef926d50b2a --- /dev/null +++ b/metrics_epoch_4.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 4, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:07:20.149041", + "training_start_epoch": 0, + "training_epochs": 4, + "epoch": 4, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.895405513048172, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.32383508127737887, + "validation_acc": 0.43171296296296297, + "validation_no_result": 0.1556712962962963, + "validation_loss": 0.9082479352752367, + "best_validation_BLEU": 0.32383508127737887, + "best_validation_acc": 0.43171296296296297, + "best_validation_no_result": 0.1556712962962963, + "best_validation_loss": 0.9082479352752367 +} \ No newline at end of file diff --git a/metrics_epoch_40.json b/metrics_epoch_40.json new file mode 100644 index 0000000000000000000000000000000000000000..cc626604b5de2a45d66e0465e6007f511c4166f4 --- /dev/null +++ b/metrics_epoch_40.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 37, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:00:11.059935", + "training_start_epoch": 0, + "training_epochs": 40, + "epoch": 40, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.17112242193384605, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6821643018761171, + "validation_acc": 0.6170428240740741, + "validation_no_result": 0.11588541666666667, + "validation_loss": 1.03222210953633, + "best_validation_BLEU": 0.6769016467773605, + "best_validation_acc": 0.6247106481481481, + "best_validation_no_result": 0.1087962962962963, + "best_validation_loss": 0.9989128684004148 +} \ No newline at end of file diff --git a/metrics_epoch_41.json b/metrics_epoch_41.json new file mode 100644 index 0000000000000000000000000000000000000000..54d3d1c1b2eda36dfadf70d4ec979f86c31887fd --- /dev/null +++ b/metrics_epoch_41.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 37, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:01:44.799608", + "training_start_epoch": 0, + "training_epochs": 41, + "epoch": 41, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.1595429590479894, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6684891541351873, + "validation_acc": 0.6077835648148148, + "validation_no_result": 0.12239583333333333, + "validation_loss": 1.0189014325539272, + "best_validation_BLEU": 0.6769016467773605, + "best_validation_acc": 0.6247106481481481, + "best_validation_no_result": 0.1087962962962963, + "best_validation_loss": 0.9989128684004148 +} \ No newline at end of file diff --git a/metrics_epoch_42.json b/metrics_epoch_42.json new file mode 100644 index 0000000000000000000000000000000000000000..ba07ccd1382c968334d0f59d863b854555ae067e --- /dev/null +++ b/metrics_epoch_42.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 37, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:03:09.898860", + "training_start_epoch": 0, + "training_epochs": 42, + "epoch": 42, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.15222934606400404, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6837890169037233, + "validation_acc": 0.6247106481481481, + "validation_no_result": 0.1087962962962963, + "validation_loss": 1.038762167096138, + "best_validation_BLEU": 0.6769016467773605, + "best_validation_acc": 0.6247106481481481, + "best_validation_no_result": 0.1087962962962963, + "best_validation_loss": 0.9989128684004148 +} \ No newline at end of file diff --git a/metrics_epoch_43.json b/metrics_epoch_43.json new file mode 100644 index 0000000000000000000000000000000000000000..0983e381f26a227505bb255e14e5fca9a767be14 --- /dev/null +++ b/metrics_epoch_43.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 37, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:04:44.721997", + "training_start_epoch": 0, + "training_epochs": 43, + "epoch": 43, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.14076573699712752, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.696944273226657, + "validation_acc": 0.6077835648148148, + "validation_no_result": 0.12644675925925927, + "validation_loss": 1.0652493784825008, + "best_validation_BLEU": 0.6769016467773605, + "best_validation_acc": 0.6247106481481481, + "best_validation_no_result": 0.1087962962962963, + "best_validation_loss": 0.9989128684004148 +} \ No newline at end of file diff --git a/metrics_epoch_44.json b/metrics_epoch_44.json new file mode 100644 index 0000000000000000000000000000000000000000..cf80d34cd5e1ef01e1afb047845627375f43f269 --- /dev/null +++ b/metrics_epoch_44.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 37, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:06:08.184296", + "training_start_epoch": 0, + "training_epochs": 44, + "epoch": 44, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.11980391334403645, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7046144983383263, + "validation_acc": 0.6247106481481481, + "validation_no_result": 0.11791087962962964, + "validation_loss": 1.061642122765382, + "best_validation_BLEU": 0.6769016467773605, + "best_validation_acc": 0.6247106481481481, + "best_validation_no_result": 0.1087962962962963, + "best_validation_loss": 0.9989128684004148 +} \ No newline at end of file diff --git a/metrics_epoch_45.json b/metrics_epoch_45.json new file mode 100644 index 0000000000000000000000000000000000000000..0746ba95bf2885a641de1a476721cb81045462c3 --- /dev/null +++ b/metrics_epoch_45.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 37, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:07:42.634039", + "training_start_epoch": 0, + "training_epochs": 45, + "epoch": 45, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.09614421298558062, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.704835063996358, + "validation_acc": 0.6129918981481481, + "validation_no_result": 0.11530671296296297, + "validation_loss": 1.112284041941166, + "best_validation_BLEU": 0.6769016467773605, + "best_validation_acc": 0.6247106481481481, + "best_validation_no_result": 0.1087962962962963, + "best_validation_loss": 0.9989128684004148 +} \ No newline at end of file diff --git a/metrics_epoch_46.json b/metrics_epoch_46.json new file mode 100644 index 0000000000000000000000000000000000000000..3a76d1e5d95a9fd22b4dcd4c44e2f896231ce6c4 --- /dev/null +++ b/metrics_epoch_46.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:09:16.409030", + "training_start_epoch": 0, + "training_epochs": 46, + "epoch": 46, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.08739731924777681, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6984141302081079, + "validation_acc": 0.6416377314814815, + "validation_no_result": 0.11530671296296297, + "validation_loss": 1.121234434346358, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_47.json b/metrics_epoch_47.json new file mode 100644 index 0000000000000000000000000000000000000000..2260a88025581ea89d3904e4c3ae49af9d3e0e4d --- /dev/null +++ b/metrics_epoch_47.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:10:55.662405", + "training_start_epoch": 0, + "training_epochs": 47, + "epoch": 47, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.08211925696920265, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6930361013969923, + "validation_acc": 0.634548611111111, + "validation_no_result": 0.10416666666666667, + "validation_loss": 1.133825662235419, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_48.json b/metrics_epoch_48.json new file mode 100644 index 0000000000000000000000000000000000000000..f12a0465ecdd1fca2c083151143d217ba78980af --- /dev/null +++ b/metrics_epoch_48.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:12:31.546018", + "training_start_epoch": 0, + "training_epochs": 48, + "epoch": 48, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.07778177600015293, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7073071684953178, + "validation_acc": 0.6312210648148148, + "validation_no_result": 0.12311921296296297, + "validation_loss": 1.1585969477891922, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_49.json b/metrics_epoch_49.json new file mode 100644 index 0000000000000000000000000000000000000000..c51312ef225f1400923943162177b427a46caf55 --- /dev/null +++ b/metrics_epoch_49.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:14:12.341546", + "training_start_epoch": 0, + "training_epochs": 49, + "epoch": 49, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.08109187040139329, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6964869334225392, + "validation_acc": 0.6325231481481481, + "validation_no_result": 0.1127025462962963, + "validation_loss": 1.1651760389407475, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_5.json b/metrics_epoch_5.json new file mode 100644 index 0000000000000000000000000000000000000000..823fce8e5047a971d3408509f3cce4d615501774 --- /dev/null +++ b/metrics_epoch_5.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 5, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:08:45.223627", + "training_start_epoch": 0, + "training_epochs": 5, + "epoch": 5, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.8448473995382135, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.3522097889512166, + "validation_acc": 0.44285300925925924, + "validation_no_result": 0.13151041666666666, + "validation_loss": 0.8661380161841711, + "best_validation_BLEU": 0.3522097889512166, + "best_validation_acc": 0.44285300925925924, + "best_validation_no_result": 0.13151041666666666, + "best_validation_loss": 0.8661380161841711 +} \ No newline at end of file diff --git a/metrics_epoch_50.json b/metrics_epoch_50.json new file mode 100644 index 0000000000000000000000000000000000000000..ce5ec6998643db0b8939fc7d2f4c522ebd24e44c --- /dev/null +++ b/metrics_epoch_50.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:15:50.787731", + "training_start_epoch": 0, + "training_epochs": 50, + "epoch": 50, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.07383564747869968, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7039476502305033, + "validation_acc": 0.6234085648148148, + "validation_no_result": 0.11140046296296297, + "validation_loss": 1.1804772640268009, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_51.json b/metrics_epoch_51.json new file mode 100644 index 0000000000000000000000000000000000000000..5f3576c1ec2f5683ad6f1e6749d563067ef617cf --- /dev/null +++ b/metrics_epoch_51.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:17:30.011877", + "training_start_epoch": 0, + "training_epochs": 51, + "epoch": 51, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.06791833307255399, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7000150706109356, + "validation_acc": 0.6338252314814815, + "validation_no_result": 0.11530671296296297, + "validation_loss": 1.1899462565779686, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_52.json b/metrics_epoch_52.json new file mode 100644 index 0000000000000000000000000000000000000000..56765991425b447ee92b9a966b99c6b35e0fd5db --- /dev/null +++ b/metrics_epoch_52.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:19:04.053589", + "training_start_epoch": 0, + "training_epochs": 52, + "epoch": 52, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.06623337922448462, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.6926042777657853, + "validation_acc": 0.6064814814814815, + "validation_no_result": 0.1205150462962963, + "validation_loss": 1.2020933479070663, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_53.json b/metrics_epoch_53.json new file mode 100644 index 0000000000000000000000000000000000000000..af5be9f641fd821e25f07c0264be6457208f8d03 --- /dev/null +++ b/metrics_epoch_53.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:20:28.089866", + "training_start_epoch": 0, + "training_epochs": 53, + "epoch": 53, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.05605946074832569, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7128349028777587, + "validation_acc": 0.6247106481481481, + "validation_no_result": 0.12311921296296297, + "validation_loss": 1.1966175511479378, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_54.json b/metrics_epoch_54.json new file mode 100644 index 0000000000000000000000000000000000000000..ececb369e3fa347d873ec31cc2fba793f360fc82 --- /dev/null +++ b/metrics_epoch_54.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:22:02.835606", + "training_start_epoch": 0, + "training_epochs": 54, + "epoch": 54, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.0485708721998063, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7074252651303048, + "validation_acc": 0.6273148148148148, + "validation_no_result": 0.11791087962962964, + "validation_loss": 1.226130726436774, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_55.json b/metrics_epoch_55.json new file mode 100644 index 0000000000000000000000000000000000000000..17d94e3b595a9060eaeb90fdac71647c4bccfdea --- /dev/null +++ b/metrics_epoch_55.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:23:37.218166", + "training_start_epoch": 0, + "training_epochs": 55, + "epoch": 55, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.04777379193427888, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7094565384091441, + "validation_acc": 0.6195023148148148, + "validation_no_result": 0.1166087962962963, + "validation_loss": 1.2262406672040622, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_56.json b/metrics_epoch_56.json new file mode 100644 index 0000000000000000000000000000000000000000..803cb0ed767feea1c3fff108831d04c423f8b511 --- /dev/null +++ b/metrics_epoch_56.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:25:10.710503", + "training_start_epoch": 0, + "training_epochs": 56, + "epoch": 56, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.04607348882339218, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7075218487475787, + "validation_acc": 0.634548611111111, + "validation_no_result": 0.1087962962962963, + "validation_loss": 1.247634395956993, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_57.json b/metrics_epoch_57.json new file mode 100644 index 0000000000000000000000000000000000000000..4e482a19ace855a2d2a2086c808b8e41fca154b6 --- /dev/null +++ b/metrics_epoch_57.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:26:59.135881", + "training_start_epoch": 0, + "training_epochs": 57, + "epoch": 57, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.043339876017787236, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7141783708143784, + "validation_acc": 0.6208043981481481, + "validation_no_result": 0.1244212962962963, + "validation_loss": 1.245709516108036, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_58.json b/metrics_epoch_58.json new file mode 100644 index 0000000000000000000000000000000000000000..f55dabbf28f43c7c01c8466e80d3c9512a35f583 --- /dev/null +++ b/metrics_epoch_58.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:28:48.198869", + "training_start_epoch": 0, + "training_epochs": 58, + "epoch": 58, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.04280694399706342, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7229747151247405, + "validation_acc": 0.6260127314814815, + "validation_no_result": 0.12572337962962962, + "validation_loss": 1.273095856110255, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_59.json b/metrics_epoch_59.json new file mode 100644 index 0000000000000000000000000000000000000000..be8492708c5802377fcb09a0ceb0d6b25ad187e6 --- /dev/null +++ b/metrics_epoch_59.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:30:38.972107", + "training_start_epoch": 0, + "training_epochs": 59, + "epoch": 59, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.038086464679376646, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.72395258707303, + "validation_acc": 0.6273148148148148, + "validation_no_result": 0.12572337962962962, + "validation_loss": 1.263894572854042, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_6.json b/metrics_epoch_6.json new file mode 100644 index 0000000000000000000000000000000000000000..4cbac41dfc0f798259be16022b38433341954641 --- /dev/null +++ b/metrics_epoch_6.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 6, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:10:08.412326", + "training_start_epoch": 0, + "training_epochs": 6, + "epoch": 6, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.8050483654845845, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.348359876044927, + "validation_acc": 0.44429976851851855, + "validation_no_result": 0.13020833333333334, + "validation_loss": 0.8393951679269472, + "best_validation_BLEU": 0.348359876044927, + "best_validation_acc": 0.44429976851851855, + "best_validation_no_result": 0.13020833333333334, + "best_validation_loss": 0.8393951679269472 +} \ No newline at end of file diff --git a/metrics_epoch_60.json b/metrics_epoch_60.json new file mode 100644 index 0000000000000000000000000000000000000000..b04d106d8335521a63897c83df29afdcf24f50c5 --- /dev/null +++ b/metrics_epoch_60.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:32:28.819014", + "training_start_epoch": 0, + "training_epochs": 60, + "epoch": 60, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.0358703440901908, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7278559978588297, + "validation_acc": 0.6351273148148148, + "validation_no_result": 0.12311921296296297, + "validation_loss": 1.268241671224435, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_61.json b/metrics_epoch_61.json new file mode 100644 index 0000000000000000000000000000000000000000..164405d096b0436b66f4717af4673d1933523409 --- /dev/null +++ b/metrics_epoch_61.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:34:18.148783", + "training_start_epoch": 0, + "training_epochs": 61, + "epoch": 61, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.03487256574021144, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7200914487496768, + "validation_acc": 0.6397569444444444, + "validation_no_result": 0.11530671296296297, + "validation_loss": 1.2846094022194545, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_62.json b/metrics_epoch_62.json new file mode 100644 index 0000000000000000000000000000000000000000..29415e02672e4f716cdf1be0fc77faec5a7b1c5c --- /dev/null +++ b/metrics_epoch_62.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:36:06.579016", + "training_start_epoch": 0, + "training_epochs": 62, + "epoch": 62, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.034294218273664065, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7241165720152878, + "validation_acc": 0.6168981481481481, + "validation_no_result": 0.12181712962962964, + "validation_loss": 1.2769825334350269, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_63.json b/metrics_epoch_63.json new file mode 100644 index 0000000000000000000000000000000000000000..ecb2eec8a0cda724aa28921bb74d2da5b86ebf98 --- /dev/null +++ b/metrics_epoch_63.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 46, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:37:56.336034", + "training_start_epoch": 0, + "training_epochs": 63, + "epoch": 63, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.03228020341220227, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7154226129528386, + "validation_acc": 0.6325231481481481, + "validation_no_result": 0.12572337962962962, + "validation_loss": 1.2909877225756645, + "best_validation_BLEU": 0.6984141302081079, + "best_validation_acc": 0.6416377314814815, + "best_validation_no_result": 0.11530671296296297, + "best_validation_loss": 1.121234434346358 +} \ No newline at end of file diff --git a/metrics_epoch_64.json b/metrics_epoch_64.json new file mode 100644 index 0000000000000000000000000000000000000000..6401fb264ec49527f9a0308c00afe60361310cc6 --- /dev/null +++ b/metrics_epoch_64.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 64, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:39:46.223809", + "training_start_epoch": 0, + "training_epochs": 64, + "epoch": 64, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.0319976161996072, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7061484056297785, + "validation_acc": 0.6449652777777778, + "validation_no_result": 0.10416666666666667, + "validation_loss": 1.2931284854809444, + "best_validation_BLEU": 0.7061484056297785, + "best_validation_acc": 0.6449652777777778, + "best_validation_no_result": 0.10416666666666667, + "best_validation_loss": 1.2931284854809444 +} \ No newline at end of file diff --git a/metrics_epoch_65.json b/metrics_epoch_65.json new file mode 100644 index 0000000000000000000000000000000000000000..34cba7ef05dc499e607bc72d3c0cc7bdf908e5da --- /dev/null +++ b/metrics_epoch_65.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:42:00.346761", + "training_start_epoch": 0, + "training_epochs": 65, + "epoch": 65, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.030482412044974892, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7229897065328674, + "validation_acc": 0.650173611111111, + "validation_no_result": 0.11921296296296297, + "validation_loss": 1.3042563870549202, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_66.json b/metrics_epoch_66.json new file mode 100644 index 0000000000000000000000000000000000000000..6d2432d4771ff44cb94c889055758623dffafac2 --- /dev/null +++ b/metrics_epoch_66.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:44:12.486951", + "training_start_epoch": 0, + "training_epochs": 66, + "epoch": 66, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.02929827385497364, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7132230420457545, + "validation_acc": 0.6364293981481481, + "validation_no_result": 0.12181712962962964, + "validation_loss": 1.3180407658219337, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_67.json b/metrics_epoch_67.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5779cf261f0b6f3caeca3f91e965d134c2660c --- /dev/null +++ b/metrics_epoch_67.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:46:00.645473", + "training_start_epoch": 0, + "training_epochs": 67, + "epoch": 67, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.029336183111776003, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7220749180453158, + "validation_acc": 0.6338252314814815, + "validation_no_result": 0.11791087962962964, + "validation_loss": 1.3172537262241046, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_68.json b/metrics_epoch_68.json new file mode 100644 index 0000000000000000000000000000000000000000..7511614a7e76fcab65e95d98fb04d8194dae9e46 --- /dev/null +++ b/metrics_epoch_68.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:48:07.763030", + "training_start_epoch": 0, + "training_epochs": 68, + "epoch": 68, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.028704730751500887, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7160570303153385, + "validation_acc": 0.630642361111111, + "validation_no_result": 0.12311921296296297, + "validation_loss": 1.3356067761778831, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_69.json b/metrics_epoch_69.json new file mode 100644 index 0000000000000000000000000000000000000000..799dd891fbd8ddd8a6109f52f0ef227fffcf421a --- /dev/null +++ b/metrics_epoch_69.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:49:56.819043", + "training_start_epoch": 0, + "training_epochs": 69, + "epoch": 69, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.027822929832407018, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7234562018857141, + "validation_acc": 0.6403356481481481, + "validation_no_result": 0.11400462962962964, + "validation_loss": 1.3301840697725613, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_7.json b/metrics_epoch_7.json new file mode 100644 index 0000000000000000000000000000000000000000..a18479c5a26b2628070d419690a615bf12bc9853 --- /dev/null +++ b/metrics_epoch_7.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 7, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:11:31.753036", + "training_start_epoch": 0, + "training_epochs": 7, + "epoch": 7, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.7711158113046126, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.37835165992261416, + "validation_acc": 0.45847800925925924, + "validation_no_result": 0.13483796296296297, + "validation_loss": 0.8319797093669573, + "best_validation_BLEU": 0.37835165992261416, + "best_validation_acc": 0.45847800925925924, + "best_validation_no_result": 0.13483796296296297, + "best_validation_loss": 0.8319797093669573 +} \ No newline at end of file diff --git a/metrics_epoch_70.json b/metrics_epoch_70.json new file mode 100644 index 0000000000000000000000000000000000000000..6a03ecaabee25329a4676f55d8d83b3d57f719a7 --- /dev/null +++ b/metrics_epoch_70.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:51:46.840089", + "training_start_epoch": 0, + "training_epochs": 70, + "epoch": 70, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.02637631700966846, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7172205941953215, + "validation_acc": 0.6442418981481481, + "validation_no_result": 0.1087962962962963, + "validation_loss": 1.3317091191808383, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_71.json b/metrics_epoch_71.json new file mode 100644 index 0000000000000000000000000000000000000000..da908f82607072cd4fc0546e569f243b08016c56 --- /dev/null +++ b/metrics_epoch_71.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:53:36.001050", + "training_start_epoch": 0, + "training_epochs": 71, + "epoch": 71, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.025984370251270858, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7123290193861502, + "validation_acc": 0.6416377314814815, + "validation_no_result": 0.11791087962962964, + "validation_loss": 1.3444155653317769, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_72.json b/metrics_epoch_72.json new file mode 100644 index 0000000000000000000000000000000000000000..35eb7cd7b3ad992796cfb82fdfe9fce035c84891 --- /dev/null +++ b/metrics_epoch_72.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:55:23.144361", + "training_start_epoch": 0, + "training_epochs": 72, + "epoch": 72, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.024248589245094494, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7243629763995799, + "validation_acc": 0.638454861111111, + "validation_no_result": 0.12109375, + "validation_loss": 1.353646678229173, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_73.json b/metrics_epoch_73.json new file mode 100644 index 0000000000000000000000000000000000000000..8806ba150c7294107612dea24b3e2262b528a358 --- /dev/null +++ b/metrics_epoch_73.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:57:17.121575", + "training_start_epoch": 0, + "training_epochs": 73, + "epoch": 73, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.02305317973210053, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7256423519179254, + "validation_acc": 0.650173611111111, + "validation_no_result": 0.11458333333333333, + "validation_loss": 1.3602836256225903, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_74.json b/metrics_epoch_74.json new file mode 100644 index 0000000000000000000000000000000000000000..ecb77865eec387146a15e10dcf025fb508443f4b --- /dev/null +++ b/metrics_epoch_74.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "1:59:09.371887", + "training_start_epoch": 0, + "training_epochs": 74, + "epoch": 74, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.0228921974839812, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7241301306811996, + "validation_acc": 0.6436631944444444, + "validation_no_result": 0.11848958333333333, + "validation_loss": 1.3589155599474907, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_75.json b/metrics_epoch_75.json new file mode 100644 index 0000000000000000000000000000000000000000..cb3ca05fefe69defb784d0ffb6690891843475ae --- /dev/null +++ b/metrics_epoch_75.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:00:56.945114", + "training_start_epoch": 0, + "training_epochs": 75, + "epoch": 75, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.02282776189121333, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7240887798169782, + "validation_acc": 0.6325231481481481, + "validation_no_result": 0.12962962962962962, + "validation_loss": 1.359854633609454, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_76.json b/metrics_epoch_76.json new file mode 100644 index 0000000000000000000000000000000000000000..14178d843052480bed68a6934a528b1cd00cd4b8 --- /dev/null +++ b/metrics_epoch_76.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:02:45.072033", + "training_start_epoch": 0, + "training_epochs": 76, + "epoch": 76, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.021604217978363687, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7266012365004841, + "validation_acc": 0.6495949074074074, + "validation_no_result": 0.11979166666666667, + "validation_loss": 1.364431341489156, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_77.json b/metrics_epoch_77.json new file mode 100644 index 0000000000000000000000000000000000000000..1c5e6ab887179845e3d4e5e3e6f63a609661df34 --- /dev/null +++ b/metrics_epoch_77.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:04:34.711722", + "training_start_epoch": 0, + "training_epochs": 77, + "epoch": 77, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.01969964803111824, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7210583397435678, + "validation_acc": 0.6482928240740741, + "validation_no_result": 0.11979166666666667, + "validation_loss": 1.365205739935239, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_78.json b/metrics_epoch_78.json new file mode 100644 index 0000000000000000000000000000000000000000..cce0dad25889c33b93e1b0dfbda553f9b23884dd --- /dev/null +++ b/metrics_epoch_78.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:06:23.960886", + "training_start_epoch": 0, + "training_epochs": 78, + "epoch": 78, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.02024272994815626, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.724350251415758, + "validation_acc": 0.6391782407407408, + "validation_no_result": 0.11979166666666667, + "validation_loss": 1.3815037235617638, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_79.json b/metrics_epoch_79.json new file mode 100644 index 0000000000000000000000000000000000000000..662042e9d4f8854050f8afc5408bf615dafef32b --- /dev/null +++ b/metrics_epoch_79.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 65, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:08:11.677141", + "training_start_epoch": 0, + "training_epochs": 79, + "epoch": 79, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.02087932777252387, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7269796234555643, + "validation_acc": 0.6495949074074074, + "validation_no_result": 0.11328125, + "validation_loss": 1.376489485303561, + "best_validation_BLEU": 0.7229897065328674, + "best_validation_acc": 0.650173611111111, + "best_validation_no_result": 0.11921296296296297, + "best_validation_loss": 1.3042563870549202 +} \ No newline at end of file diff --git a/metrics_epoch_8.json b/metrics_epoch_8.json new file mode 100644 index 0000000000000000000000000000000000000000..88c2badcfc13c6981487926d4141accfb80e927d --- /dev/null +++ b/metrics_epoch_8.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 8, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:12:54.325766", + "training_start_epoch": 0, + "training_epochs": 8, + "epoch": 8, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.7373076503927057, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.38077241418338464, + "validation_acc": 0.48321759259259256, + "validation_no_result": 0.11140046296296297, + "validation_loss": 0.7973246946930885, + "best_validation_BLEU": 0.38077241418338464, + "best_validation_acc": 0.48321759259259256, + "best_validation_no_result": 0.11140046296296297, + "best_validation_loss": 0.7973246946930885 +} \ No newline at end of file diff --git a/metrics_epoch_80.json b/metrics_epoch_80.json new file mode 100644 index 0000000000000000000000000000000000000000..506b375aed17b1041cab761d4bb2dd4e78edd197 --- /dev/null +++ b/metrics_epoch_80.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:10:07.761881", + "training_start_epoch": 0, + "training_epochs": 80, + "epoch": 80, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.019872400206937032, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7245620136488832, + "validation_acc": 0.6508969907407408, + "validation_no_result": 0.11067708333333333, + "validation_loss": 1.3817762682835262, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_81.json b/metrics_epoch_81.json new file mode 100644 index 0000000000000000000000000000000000000000..882753082e288675b709270335dbda80e5e72e5b --- /dev/null +++ b/metrics_epoch_81.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:12:14.064481", + "training_start_epoch": 0, + "training_epochs": 81, + "epoch": 81, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.017521460680291055, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7238012700329899, + "validation_acc": 0.6443865740740741, + "validation_no_result": 0.11588541666666667, + "validation_loss": 1.3830252140760422, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_82.json b/metrics_epoch_82.json new file mode 100644 index 0000000000000000000000000000000000000000..4b1fcec2e3b680ec560318ec9f93bf7db3abe3b7 --- /dev/null +++ b/metrics_epoch_82.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:14:00.992742", + "training_start_epoch": 0, + "training_epochs": 82, + "epoch": 82, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.018971136305481195, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7267779633348508, + "validation_acc": 0.6495949074074074, + "validation_no_result": 0.12369791666666667, + "validation_loss": 1.3803121149539948, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_83.json b/metrics_epoch_83.json new file mode 100644 index 0000000000000000000000000000000000000000..822d40908d984794299c9dc836a48e43d69bee9a --- /dev/null +++ b/metrics_epoch_83.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:15:54.119298", + "training_start_epoch": 0, + "training_epochs": 83, + "epoch": 83, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.018187626988881014, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7325209776549473, + "validation_acc": 0.6508969907407408, + "validation_no_result": 0.11328125, + "validation_loss": 1.388483499487241, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_84.json b/metrics_epoch_84.json new file mode 100644 index 0000000000000000000000000000000000000000..c4837fe2eba8a70af0b64acb7a4be08eb8a8b614 --- /dev/null +++ b/metrics_epoch_84.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:17:43.139008", + "training_start_epoch": 0, + "training_epochs": 84, + "epoch": 84, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.018349176162684507, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7256516352666668, + "validation_acc": 0.6397569444444444, + "validation_no_result": 0.11197916666666667, + "validation_loss": 1.3886348779002826, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_85.json b/metrics_epoch_85.json new file mode 100644 index 0000000000000000000000000000000000000000..76373fe4e4730b4dce8b8269e211fd26d5d4d126 --- /dev/null +++ b/metrics_epoch_85.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:19:35.530588", + "training_start_epoch": 0, + "training_epochs": 85, + "epoch": 85, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.018646015739068388, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7261286762309054, + "validation_acc": 0.6404803240740741, + "validation_no_result": 0.11067708333333333, + "validation_loss": 1.3930509214599927, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_86.json b/metrics_epoch_86.json new file mode 100644 index 0000000000000000000000000000000000000000..a68a812a285ffcd27cf6c69bb49dc5716486213b --- /dev/null +++ b/metrics_epoch_86.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:21:31.794157", + "training_start_epoch": 0, + "training_epochs": 86, + "epoch": 86, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.017673088898035615, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7269483961768961, + "validation_acc": 0.6443865740740741, + "validation_no_result": 0.11458333333333333, + "validation_loss": 1.3963286578655243, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_87.json b/metrics_epoch_87.json new file mode 100644 index 0000000000000000000000000000000000000000..b8a310ea0ed3ffadd2b4235c00fddbcbaf621c9a --- /dev/null +++ b/metrics_epoch_87.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:23:18.744182", + "training_start_epoch": 0, + "training_epochs": 87, + "epoch": 87, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.016832234435291454, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7265608515756667, + "validation_acc": 0.6469907407407408, + "validation_no_result": 0.11197916666666667, + "validation_loss": 1.397053025662899, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_88.json b/metrics_epoch_88.json new file mode 100644 index 0000000000000000000000000000000000000000..1a943fd8f6a1a4db9b0d7cda1aa70603927f1eb6 --- /dev/null +++ b/metrics_epoch_88.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:25:07.031501", + "training_start_epoch": 0, + "training_epochs": 88, + "epoch": 88, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.016711718893863938, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.724987463449164, + "validation_acc": 0.6443865740740741, + "validation_no_result": 0.11588541666666667, + "validation_loss": 1.3939060419797897, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_89.json b/metrics_epoch_89.json new file mode 100644 index 0000000000000000000000000000000000000000..165e02c8e6299c469ccc10e31eba6f102b2dd7f1 --- /dev/null +++ b/metrics_epoch_89.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:26:55.180809", + "training_start_epoch": 0, + "training_epochs": 89, + "epoch": 89, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.01651661289771172, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7226806914262347, + "validation_acc": 0.6352719907407408, + "validation_no_result": 0.1171875, + "validation_loss": 1.4012814511855443, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_9.json b/metrics_epoch_9.json new file mode 100644 index 0000000000000000000000000000000000000000..c2cd9af4758482583afa788f7249c4b5836a6347 --- /dev/null +++ b/metrics_epoch_9.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 8, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17660, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "0:14:18.475761", + "training_start_epoch": 0, + "training_epochs": 9, + "epoch": 9, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.7182401624592868, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17660, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.35268137885590434, + "validation_acc": 0.46440972222222227, + "validation_no_result": 0.11530671296296297, + "validation_loss": 0.8008881782492002, + "best_validation_BLEU": 0.38077241418338464, + "best_validation_acc": 0.48321759259259256, + "best_validation_no_result": 0.11140046296296297, + "best_validation_loss": 0.7973246946930885 +} \ No newline at end of file diff --git a/metrics_epoch_90.json b/metrics_epoch_90.json new file mode 100644 index 0000000000000000000000000000000000000000..0249de0ccdb6cbbd9966a87c24090a914fe9196c --- /dev/null +++ b/metrics_epoch_90.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:28:42.595687", + "training_start_epoch": 0, + "training_epochs": 90, + "epoch": 90, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.01537049892358482, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.727213295887757, + "validation_acc": 0.6417824074074074, + "validation_no_result": 0.11848958333333333, + "validation_loss": 1.4017266556620598, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_91.json b/metrics_epoch_91.json new file mode 100644 index 0000000000000000000000000000000000000000..978e10a723d10d925f4f96ea90c34c0711fea504 --- /dev/null +++ b/metrics_epoch_91.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:30:29.772405", + "training_start_epoch": 0, + "training_epochs": 91, + "epoch": 91, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.01567524956031279, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7263786661247342, + "validation_acc": 0.6378761574074074, + "validation_no_result": 0.1171875, + "validation_loss": 1.4088865220546722, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_92.json b/metrics_epoch_92.json new file mode 100644 index 0000000000000000000000000000000000000000..d475ce13aa32f7636125270e0ee289adca491517 --- /dev/null +++ b/metrics_epoch_92.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:32:16.238885", + "training_start_epoch": 0, + "training_epochs": 92, + "epoch": 92, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.015533217218365859, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7285192584342107, + "validation_acc": 0.6332465277777778, + "validation_no_result": 0.12572337962962962, + "validation_loss": 1.4104147876302402, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_93.json b/metrics_epoch_93.json new file mode 100644 index 0000000000000000000000000000000000000000..4fb60e747cbeedef5b4a10d35e1fd0046631e88a --- /dev/null +++ b/metrics_epoch_93.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:34:06.867258", + "training_start_epoch": 0, + "training_epochs": 93, + "epoch": 93, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.015072003692727197, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7285539443710713, + "validation_acc": 0.6365740740740741, + "validation_no_result": 0.12239583333333333, + "validation_loss": 1.4112620949745178, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_94.json b/metrics_epoch_94.json new file mode 100644 index 0000000000000000000000000000000000000000..ce1612ab2b25d13d29209486a3b9f8c3aea434e2 --- /dev/null +++ b/metrics_epoch_94.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:35:56.259754", + "training_start_epoch": 0, + "training_epochs": 94, + "epoch": 94, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.014531611329452559, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7277052682532325, + "validation_acc": 0.6365740740740741, + "validation_no_result": 0.11848958333333333, + "validation_loss": 1.4106633414824803, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_95.json b/metrics_epoch_95.json new file mode 100644 index 0000000000000000000000000000000000000000..657d2d0091fbd0e0b3af0fc76b603f2a8c55fee4 --- /dev/null +++ b/metrics_epoch_95.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:37:45.282153", + "training_start_epoch": 0, + "training_epochs": 95, + "epoch": 95, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.014618165079842914, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7266067850908157, + "validation_acc": 0.6391782407407408, + "validation_no_result": 0.12239583333333333, + "validation_loss": 1.411758229136467, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_96.json b/metrics_epoch_96.json new file mode 100644 index 0000000000000000000000000000000000000000..8b304064b332cac65296663e25a3d8ef27ca246e --- /dev/null +++ b/metrics_epoch_96.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:39:35.233646", + "training_start_epoch": 0, + "training_epochs": 96, + "epoch": 96, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.015245335367084905, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7297971727977184, + "validation_acc": 0.6404803240740741, + "validation_no_result": 0.1171875, + "validation_loss": 1.416468304892381, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_97.json b/metrics_epoch_97.json new file mode 100644 index 0000000000000000000000000000000000000000..0938618be5b0782ee645ab1d26c3dc0dc20b2f5b --- /dev/null +++ b/metrics_epoch_97.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:41:29.533201", + "training_start_epoch": 0, + "training_epochs": 97, + "epoch": 97, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.014791254534132102, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7301994285895989, + "validation_acc": 0.6404803240740741, + "validation_no_result": 0.11588541666666667, + "validation_loss": 1.4220996722579002, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_98.json b/metrics_epoch_98.json new file mode 100644 index 0000000000000000000000000000000000000000..06f768ccdaa6d5cc6e48361815b1964310c284b8 --- /dev/null +++ b/metrics_epoch_98.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:43:13.505625", + "training_start_epoch": 0, + "training_epochs": 98, + "epoch": 98, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.015271843813190406, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7290488167601582, + "validation_acc": 0.638454861111111, + "validation_no_result": 0.11921296296296297, + "validation_loss": 1.4180005093415577, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/metrics_epoch_99.json b/metrics_epoch_99.json new file mode 100644 index 0000000000000000000000000000000000000000..8b30e75c4c9c194d6135ad04470ef6967d768c65 --- /dev/null +++ b/metrics_epoch_99.json @@ -0,0 +1,34 @@ +{ + "best_epoch": 80, + "peak_cpu_memory_MB": 4772.668, + "peak_gpu_0_memory_MB": 5, + "peak_gpu_1_memory_MB": 5, + "peak_gpu_2_memory_MB": 5, + "peak_gpu_3_memory_MB": 5, + "peak_gpu_4_memory_MB": 17662, + "peak_gpu_5_memory_MB": 5, + "peak_gpu_6_memory_MB": 5, + "training_duration": "2:45:01.064397", + "training_start_epoch": 0, + "training_epochs": 99, + "epoch": 99, + "training_acc": 0, + "training_no_result": 0, + "training_loss": 0.013839077864858237, + "training_cpu_memory_MB": 4772.668, + "training_gpu_0_memory_MB": 5, + "training_gpu_1_memory_MB": 5, + "training_gpu_2_memory_MB": 5, + "training_gpu_3_memory_MB": 5, + "training_gpu_4_memory_MB": 17662, + "training_gpu_5_memory_MB": 5, + "training_gpu_6_memory_MB": 5, + "validation_BLEU": 0.7266542542114865, + "validation_acc": 0.6417824074074074, + "validation_no_result": 0.11848958333333333, + "validation_loss": 1.416664329667886, + "best_validation_BLEU": 0.7245620136488832, + "best_validation_acc": 0.6508969907407408, + "best_validation_no_result": 0.11067708333333333, + "best_validation_loss": 1.3817762682835262 +} \ No newline at end of file diff --git a/stderr.log b/stderr.log new file mode 100644 index 0000000000000000000000000000000000000000..5a2036b4fac6e6a58185602a2414b55d324d6f1f --- /dev/null +++ b/stderr.log @@ -0,0 +1,14089 @@ +0it [00:00, ?it/s] +1it [00:00, 1.61it/s] +94it [00:00, 174.95it/s] +198it [00:00, 356.41it/s] +275it [00:01, 354.59it/s] +379it [00:01, 495.35it/s] +489it [00:01, 631.21it/s] +593it [00:01, 730.42it/s] +690it [00:01, 791.65it/s] +796it [00:01, 862.25it/s] +904it [00:01, 922.64it/s] +1005it [00:01, 944.22it/s] +1111it [00:01, 977.44it/s] +1214it [00:02, 718.60it/s] +1318it [00:02, 792.41it/s] +1425it [00:02, 859.05it/s] +1531it [00:02, 908.72it/s] +1633it [00:02, 935.83it/s] +1737it [00:02, 963.82it/s] +1840it [00:02, 981.89it/s] +1945it [00:02, 999.55it/s] +2047it [00:03, 695.24it/s] +2147it [00:03, 761.38it/s] +2248it [00:03, 819.06it/s] +2353it [00:03, 877.41it/s] +2457it [00:03, 920.62it/s] +2560it [00:03, 949.07it/s] +2667it [00:03, 982.00it/s] +2769it [00:03, 989.11it/s] +2874it [00:03, 1006.73it/s] +2979it [00:03, 1018.50it/s] +3083it [00:04, 650.72it/s] +3188it [00:04, 734.24it/s] +3290it [00:04, 799.30it/s] +3398it [00:04, 867.93it/s] +3499it [00:04, 753.19it/s] + +0it [00:00, ?it/s] +1it [00:00, 4.89it/s] +120it [00:00, 491.20it/s] +226it [00:00, 699.58it/s] +329it [00:00, 811.20it/s] +434it [00:00, 889.14it/s] +535it [00:00, 927.37it/s] +640it [00:00, 963.25it/s] +745it [00:00, 820.29it/s] + +0it [00:00, ?it/s] +1it [00:00, 5.21it/s] +28it [00:00, 77.14it/s] +130it [00:00, 345.36it/s] +232it [00:00, 539.45it/s] +336it [00:00, 682.78it/s] +440it [00:00, 786.56it/s] +542it [00:00, 855.06it/s] +650it [00:01, 918.31it/s] +754it [00:01, 953.67it/s] +754it [00:01, 671.37it/s] + +0it [00:00, ?it/s] +4998it [00:00, 62045.70it/s] + + 0%| | 0/110 [00:00 from params {'source_token_indexer': {'tokens': {'do_lowercase': False, 'model_name': './roberta', 'type': 'pretrained_transformer'}}, 'target_token_indexer': {'tokens': {'type': 'single_id'}}, 'tokenizer': {'word_splitter': {'type': 'just_spaces'}}, 'type': 's2s_manual_reader'} and extras set() +2025-03-24 13:21:27,584 - INFO - allennlp.common.params - dataset_reader.type = s2s_manual_reader +2025-03-24 13:21:27,585 - INFO - allennlp.common.from_params - instantiating class from params {'source_token_indexer': {'tokens': {'do_lowercase': False, 'model_name': './roberta', 'type': 'pretrained_transformer'}}, 'target_token_indexer': {'tokens': {'type': 'single_id'}}, 'tokenizer': {'word_splitter': {'type': 'just_spaces'}}} and extras set() +2025-03-24 13:21:27,585 - INFO - allennlp.common.from_params - instantiating class from params {'word_splitter': {'type': 'just_spaces'}} and extras set() +2025-03-24 13:21:27,585 - INFO - allennlp.common.params - dataset_reader.tokenizer.type = word +2025-03-24 13:21:27,585 - INFO - allennlp.common.from_params - instantiating class from params {'word_splitter': {'type': 'just_spaces'}} and extras set() +2025-03-24 13:21:27,585 - INFO - allennlp.common.from_params - instantiating class from params {'type': 'just_spaces'} and extras set() +2025-03-24 13:21:27,585 - INFO - allennlp.common.params - dataset_reader.tokenizer.word_splitter.type = just_spaces +2025-03-24 13:21:27,585 - INFO - allennlp.common.from_params - instantiating class from params {} and extras set() +2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.tokenizer.start_tokens = None +2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.tokenizer.end_tokens = None +2025-03-24 13:21:27,586 - INFO - allennlp.common.from_params - instantiating class from params {'do_lowercase': False, 'model_name': './roberta', 'type': 'pretrained_transformer'} and extras set() +2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.source_token_indexer.tokens.type = pretrained_transformer +2025-03-24 13:21:27,586 - INFO - allennlp.common.from_params - instantiating class from params {'do_lowercase': False, 'model_name': './roberta'} and extras set() +2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.source_token_indexer.tokens.model_name = ./roberta +2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.source_token_indexer.tokens.do_lowercase = False +2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.source_token_indexer.tokens.namespace = tags +2025-03-24 13:21:27,586 - INFO - allennlp.common.params - dataset_reader.source_token_indexer.tokens.token_min_padding_length = 0 +2025-03-24 13:21:27,586 - INFO - pytorch_transformers.tokenization_utils - Model name './roberta' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc). Assuming './roberta' is a path or url to a directory containing tokenizer files. +2025-03-24 13:21:27,588 - INFO - pytorch_transformers.tokenization_utils - loading file ./roberta/vocab.txt +2025-03-24 13:21:27,588 - INFO - pytorch_transformers.tokenization_utils - loading file ./roberta/added_tokens.json +2025-03-24 13:21:27,588 - INFO - pytorch_transformers.tokenization_utils - loading file ./roberta/special_tokens_map.json +2025-03-24 13:21:27,611 - INFO - allennlp.data.token_indexers.pretrained_transformer_indexer - Using token indexer padding value of 0 +2025-03-24 13:21:27,611 - INFO - allennlp.common.from_params - instantiating class from params {'type': 'single_id'} and extras set() +2025-03-24 13:21:27,611 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.type = single_id +2025-03-24 13:21:27,611 - INFO - allennlp.common.from_params - instantiating class from params {} and extras set() +2025-03-24 13:21:27,611 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.namespace = tokens +2025-03-24 13:21:27,612 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.lowercase_tokens = False +2025-03-24 13:21:27,612 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.start_tokens = None +2025-03-24 13:21:27,612 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.end_tokens = None +2025-03-24 13:21:27,612 - INFO - allennlp.common.params - dataset_reader.target_token_indexer.tokens.token_min_padding_length = 0 +2025-03-24 13:21:27,612 - INFO - allennlp.common.params - dataset_reader.model_name = None +2025-03-24 13:21:27,613 - INFO - allennlp.common.params - train_data_path = ./GeoQA-Data/GeoQA-Pro/pro_train.pk +2025-03-24 13:21:27,613 - INFO - allennlp.training.util - Reading training data from ./GeoQA-Data/GeoQA-Pro/pro_train.pk +2025-03-24 13:21:32,260 - INFO - allennlp.common.params - validation_data_path = ./GeoQA-Data/GeoQA-Pro/pro_dev.pk +2025-03-24 13:21:32,261 - INFO - allennlp.training.util - Reading validation data from ./GeoQA-Data/GeoQA-Pro/pro_dev.pk +2025-03-24 13:21:33,170 - INFO - allennlp.common.params - test_data_path = ./GeoQA-Data/GeoQA-Pro/pro_test.pk +2025-03-24 13:21:33,170 - INFO - allennlp.training.util - Reading test data from ./GeoQA-Data/GeoQA-Pro/pro_test.pk +2025-03-24 13:21:34,300 - INFO - allennlp.training.trainer_pieces - From dataset instances, validation, test, train will be considered for vocabulary creation. +2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.type = None +2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.extend = False +2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.directory_path = None +2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.min_count = None +2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.max_vocab_size = None +2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.non_padded_namespaces = ('*tags', '*labels') +2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.pretrained_files = {} +2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.min_pretrained_embeddings = None +2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.only_include_pretrained_words = False +2025-03-24 13:21:34,301 - INFO - allennlp.common.params - vocabulary.tokens_to_add = None +2025-03-24 13:21:34,301 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset. +2025-03-24 13:21:34,382 - INFO - allennlp.common.from_params - instantiating class from params {'beam_size': 10, 'encoder': {'dropout': 0.5, 'emb_dim': 768, 'hid_dim': 512, 'input_dim': 21128}, 'knowledge_points_ratio': 0, 'max_decoding_steps': 16, 'resnet_pretrained': './', 'scheduled_sampling_ratio': 0, 'source_embedder': {'token_embedders': {}}, 'target_embedding_dim': 512, 'type': 'geo_s2s'} and extras {'vocab'} +2025-03-24 13:21:34,382 - INFO - allennlp.common.params - model.type = geo_s2s +2025-03-24 13:21:34,382 - INFO - allennlp.common.from_params - instantiating class from params {'beam_size': 10, 'encoder': {'dropout': 0.5, 'emb_dim': 768, 'hid_dim': 512, 'input_dim': 21128}, 'knowledge_points_ratio': 0, 'max_decoding_steps': 16, 'resnet_pretrained': './', 'scheduled_sampling_ratio': 0, 'source_embedder': {'token_embedders': {}}, 'target_embedding_dim': 512} and extras {'vocab'} +2025-03-24 13:21:34,383 - INFO - allennlp.common.from_params - instantiating class from params {'token_embedders': {}} and extras {'vocab'} +2025-03-24 13:21:34,383 - INFO - allennlp.common.params - model.source_embedder.type = basic +2025-03-24 13:21:34,383 - INFO - allennlp.common.params - model.source_embedder.embedder_to_indexer_map = None +2025-03-24 13:21:34,383 - INFO - allennlp.common.params - model.source_embedder.allow_unmatched_keys = False +2025-03-24 13:21:34,383 - INFO - allennlp.common.from_params - instantiating class from params {'dropout': 0.5, 'emb_dim': 768, 'hid_dim': 512, 'input_dim': 21128} and extras {'vocab'} +2025-03-24 13:21:34,383 - INFO - allennlp.common.params - model.encoder.input_dim = 21128 +2025-03-24 13:21:34,383 - INFO - allennlp.common.params - model.encoder.emb_dim = 768 +2025-03-24 13:21:34,384 - INFO - allennlp.common.params - model.encoder.hid_dim = 512 +2025-03-24 13:21:34,384 - INFO - allennlp.common.params - model.encoder.dropout = 0.5 +2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.max_decoding_steps = 16 +2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.knowledge_points_ratio = 0 +2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.beam_size = 10 +2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.target_namespace = tokens +2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.target_embedding_dim = 512 +2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.scheduled_sampling_ratio = 0 +2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.resnet_pretrained = ./ +2025-03-24 13:21:36,482 - INFO - allennlp.common.params - model.use_bleu = True +2025-03-24 13:21:39,512 - INFO - root - Loading a model trained before embedding extension was implemented; pass an explicit vocab namespace if you want to extend the vocabulary. +2025-03-24 13:21:39,662 - INFO - allennlp.common.from_params - instantiating class from params {'batch_size': 32, 'type': 'basic'} and extras set() +2025-03-24 13:21:39,662 - INFO - allennlp.common.params - iterator.type = basic +2025-03-24 13:21:39,662 - INFO - allennlp.common.from_params - instantiating class from params {'batch_size': 32} and extras set() +2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.batch_size = 32 +2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.instances_per_epoch = None +2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.max_instances_in_memory = None +2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.cache_instances = False +2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.track_epoch = False +2025-03-24 13:21:39,663 - INFO - allennlp.common.params - iterator.maximum_samples_per_batch = None +2025-03-24 13:21:39,663 - INFO - allennlp.common.params - validation_iterator = None +2025-03-24 13:21:39,663 - INFO - allennlp.common.params - trainer.no_grad = () +2025-03-24 13:21:39,671 - INFO - allennlp.training.trainer_pieces - Following parameters are Frozen (without gradient): +2025-03-24 13:21:39,671 - INFO - allennlp.training.trainer_pieces - vit_model.vit.embeddings.position_embeddings +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_pos_embed +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - Following parameters are Tunable (with gradient): +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - channel_transform.weight +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - channel_transform.bias +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_v.weight +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_v.bias +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_k.weight +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_k.bias +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_q.weight +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_q.bias +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_merge.weight +2025-03-24 13:21:39,672 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.mhatt.linear_merge.bias +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.ffn.mlp.linear.weight +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.ffn.mlp.linear.bias +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.norm1.a_2 +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.norm1.b_2 +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.norm2.a_2 +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.0.norm2.b_2 +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_v.weight +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_v.bias +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_k.weight +2025-03-24 13:21:39,673 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_k.bias +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_q.weight +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_q.bias +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_merge.weight +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.mhatt.linear_merge.bias +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.ffn.mlp.linear.weight +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.ffn.mlp.linear.bias +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.norm1.a_2 +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.norm1.b_2 +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.norm2.a_2 +2025-03-24 13:21:39,674 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.1.norm2.b_2 +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_v.weight +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_v.bias +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_k.weight +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_k.bias +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_q.weight +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_q.bias +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_merge.weight +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.mhatt.linear_merge.bias +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.ffn.mlp.linear.weight +2025-03-24 13:21:39,675 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.ffn.mlp.linear.bias +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.norm1.a_2 +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.norm1.b_2 +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.norm2.a_2 +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.2.norm2.b_2 +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_v.weight +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_v.bias +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_k.weight +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_k.bias +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_q.weight +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_q.bias +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_merge.weight +2025-03-24 13:21:39,676 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.mhatt.linear_merge.bias +2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.ffn.mlp.linear.weight +2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.ffn.mlp.linear.bias +2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.norm1.a_2 +2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.norm1.b_2 +2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.norm2.a_2 +2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.3.norm2.b_2 +2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_v.weight +2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_v.bias +2025-03-24 13:21:39,677 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_k.weight +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_k.bias +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_q.weight +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_q.bias +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_merge.weight +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.mhatt.linear_merge.bias +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.ffn.mlp.linear.weight +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.ffn.mlp.linear.bias +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.norm1.a_2 +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.norm1.b_2 +2025-03-24 13:21:39,678 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.norm2.a_2 +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.4.norm2.b_2 +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_v.weight +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_v.bias +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_k.weight +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_k.bias +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_q.weight +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_q.bias +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_merge.weight +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.mhatt.linear_merge.bias +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,679 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.ffn.mlp.linear.weight +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.ffn.mlp.linear.bias +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.norm1.a_2 +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.norm1.b_2 +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.norm2.a_2 +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.enc_list.5.norm2.b_2 +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_v.weight +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_v.bias +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_k.weight +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_k.bias +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_q.weight +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_q.bias +2025-03-24 13:21:39,680 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_merge.weight +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt1.linear_merge.bias +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_v.weight +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_v.bias +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_k.weight +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_k.bias +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_q.weight +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_q.bias +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_merge.weight +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.mhatt2.linear_merge.bias +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,681 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.ffn.mlp.linear.weight +2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.ffn.mlp.linear.bias +2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm1.a_2 +2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm1.b_2 +2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm2.a_2 +2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm2.b_2 +2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm3.a_2 +2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.0.norm3.b_2 +2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_v.weight +2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_v.bias +2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_k.weight +2025-03-24 13:21:39,682 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_k.bias +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_q.weight +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_q.bias +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_merge.weight +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt1.linear_merge.bias +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_v.weight +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_v.bias +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_k.weight +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_k.bias +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_q.weight +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_q.bias +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_merge.weight +2025-03-24 13:21:39,683 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.mhatt2.linear_merge.bias +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.ffn.mlp.linear.weight +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.ffn.mlp.linear.bias +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm1.a_2 +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm1.b_2 +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm2.a_2 +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm2.b_2 +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm3.a_2 +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.1.norm3.b_2 +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_v.weight +2025-03-24 13:21:39,684 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_v.bias +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_k.weight +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_k.bias +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_q.weight +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_q.bias +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_merge.weight +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt1.linear_merge.bias +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_v.weight +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_v.bias +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_k.weight +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_k.bias +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_q.weight +2025-03-24 13:21:39,685 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_q.bias +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_merge.weight +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.mhatt2.linear_merge.bias +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.ffn.mlp.linear.weight +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.ffn.mlp.linear.bias +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm1.a_2 +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm1.b_2 +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm2.a_2 +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm2.b_2 +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm3.a_2 +2025-03-24 13:21:39,686 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.2.norm3.b_2 +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_v.weight +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_v.bias +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_k.weight +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_k.bias +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_q.weight +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_q.bias +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_merge.weight +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt1.linear_merge.bias +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_v.weight +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_v.bias +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_k.weight +2025-03-24 13:21:39,687 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_k.bias +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_q.weight +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_q.bias +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_merge.weight +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.mhatt2.linear_merge.bias +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.ffn.mlp.linear.weight +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.ffn.mlp.linear.bias +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm1.a_2 +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm1.b_2 +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm2.a_2 +2025-03-24 13:21:39,688 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm2.b_2 +2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm3.a_2 +2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.3.norm3.b_2 +2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_v.weight +2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_v.bias +2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_k.weight +2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_k.bias +2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_q.weight +2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_q.bias +2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_merge.weight +2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt1.linear_merge.bias +2025-03-24 13:21:39,689 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_v.weight +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_v.bias +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_k.weight +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_k.bias +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_q.weight +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_q.bias +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_merge.weight +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.mhatt2.linear_merge.bias +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.ffn.mlp.linear.weight +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.ffn.mlp.linear.bias +2025-03-24 13:21:39,690 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm1.a_2 +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm1.b_2 +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm2.a_2 +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm2.b_2 +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm3.a_2 +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.4.norm3.b_2 +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_v.weight +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_v.bias +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_k.weight +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_k.bias +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_q.weight +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_q.bias +2025-03-24 13:21:39,691 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_merge.weight +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt1.linear_merge.bias +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_v.weight +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_v.bias +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_k.weight +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_k.bias +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_q.weight +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_q.bias +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_merge.weight +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.mhatt2.linear_merge.bias +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,692 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.ffn.mlp.linear.weight +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.ffn.mlp.linear.bias +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm1.a_2 +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm1.b_2 +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm2.a_2 +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm2.b_2 +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm3.a_2 +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - mcan.dec_list.5.norm3.b_2 +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - attflat_img.mlp.fc.linear.weight +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - attflat_img.mlp.fc.linear.bias +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - attflat_img.mlp.linear.weight +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - attflat_img.mlp.linear.bias +2025-03-24 13:21:39,693 - INFO - allennlp.training.trainer_pieces - attflat_img.linear_merge.weight +2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_img.linear_merge.bias +2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.mlp.fc.linear.weight +2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.mlp.fc.linear.bias +2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.mlp.linear.weight +2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.mlp.linear.bias +2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.linear_merge.weight +2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - attflat_lang.linear_merge.bias +2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - decode_transform.weight +2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - decode_transform.bias +2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.embeddings.word_embeddings.weight +2025-03-24 13:21:39,694 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.embeddings.position_embeddings.weight +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.embeddings.token_type_embeddings.weight +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.embeddings.LayerNorm.weight +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.embeddings.LayerNorm.bias +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.query.weight +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.query.bias +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.key.weight +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.key.bias +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.value.weight +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.self.value.bias +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.output.dense.weight +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.output.dense.bias +2025-03-24 13:21:39,695 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.output.LayerNorm.weight +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.attention.output.LayerNorm.bias +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.intermediate.dense.weight +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.intermediate.dense.bias +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.output.dense.weight +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.output.dense.bias +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.output.LayerNorm.weight +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.0.output.LayerNorm.bias +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.query.weight +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.query.bias +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.key.weight +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.key.bias +2025-03-24 13:21:39,696 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.value.weight +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.self.value.bias +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.output.dense.weight +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.output.dense.bias +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.output.LayerNorm.weight +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.attention.output.LayerNorm.bias +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.intermediate.dense.weight +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.intermediate.dense.bias +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.output.dense.weight +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.output.dense.bias +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.output.LayerNorm.weight +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.1.output.LayerNorm.bias +2025-03-24 13:21:39,697 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.query.weight +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.query.bias +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.key.weight +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.key.bias +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.value.weight +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.self.value.bias +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.output.dense.weight +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.output.dense.bias +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.output.LayerNorm.weight +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.attention.output.LayerNorm.bias +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.intermediate.dense.weight +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.intermediate.dense.bias +2025-03-24 13:21:39,698 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.output.dense.weight +2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.output.dense.bias +2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.output.LayerNorm.weight +2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.2.output.LayerNorm.bias +2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.query.weight +2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.query.bias +2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.key.weight +2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.key.bias +2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.value.weight +2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.self.value.bias +2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.output.dense.weight +2025-03-24 13:21:39,699 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.output.dense.bias +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.output.LayerNorm.weight +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.attention.output.LayerNorm.bias +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.intermediate.dense.weight +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.intermediate.dense.bias +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.output.dense.weight +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.output.dense.bias +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.output.LayerNorm.weight +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.3.output.LayerNorm.bias +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.query.weight +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.query.bias +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.key.weight +2025-03-24 13:21:39,700 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.key.bias +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.value.weight +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.self.value.bias +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.output.dense.weight +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.output.dense.bias +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.output.LayerNorm.weight +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.attention.output.LayerNorm.bias +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.intermediate.dense.weight +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.intermediate.dense.bias +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.output.dense.weight +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.output.dense.bias +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.output.LayerNorm.weight +2025-03-24 13:21:39,701 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.4.output.LayerNorm.bias +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.query.weight +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.query.bias +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.key.weight +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.key.bias +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.value.weight +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.self.value.bias +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.output.dense.weight +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.output.dense.bias +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.output.LayerNorm.weight +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.attention.output.LayerNorm.bias +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.intermediate.dense.weight +2025-03-24 13:21:39,702 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.intermediate.dense.bias +2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.output.dense.weight +2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.output.dense.bias +2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.output.LayerNorm.weight +2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.5.output.LayerNorm.bias +2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.query.weight +2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.query.bias +2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.key.weight +2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.key.bias +2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.value.weight +2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.self.value.bias +2025-03-24 13:21:39,703 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.output.dense.weight +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.output.dense.bias +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.output.LayerNorm.weight +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.attention.output.LayerNorm.bias +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.intermediate.dense.weight +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.intermediate.dense.bias +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.output.dense.weight +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.output.dense.bias +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.output.LayerNorm.weight +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.6.output.LayerNorm.bias +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.query.weight +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.query.bias +2025-03-24 13:21:39,704 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.key.weight +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.key.bias +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.value.weight +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.self.value.bias +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.output.dense.weight +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.output.dense.bias +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.output.LayerNorm.weight +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.attention.output.LayerNorm.bias +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.intermediate.dense.weight +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.intermediate.dense.bias +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.output.dense.weight +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.output.dense.bias +2025-03-24 13:21:39,705 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.output.LayerNorm.weight +2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.7.output.LayerNorm.bias +2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.query.weight +2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.query.bias +2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.key.weight +2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.key.bias +2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.value.weight +2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.self.value.bias +2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.output.dense.weight +2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.output.dense.bias +2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.output.LayerNorm.weight +2025-03-24 13:21:39,706 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.attention.output.LayerNorm.bias +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.intermediate.dense.weight +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.intermediate.dense.bias +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.output.dense.weight +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.output.dense.bias +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.output.LayerNorm.weight +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.8.output.LayerNorm.bias +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.query.weight +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.query.bias +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.key.weight +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.key.bias +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.value.weight +2025-03-24 13:21:39,707 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.self.value.bias +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.output.dense.weight +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.output.dense.bias +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.output.LayerNorm.weight +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.attention.output.LayerNorm.bias +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.intermediate.dense.weight +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.intermediate.dense.bias +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.output.dense.weight +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.output.dense.bias +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.output.LayerNorm.weight +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.9.output.LayerNorm.bias +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.query.weight +2025-03-24 13:21:39,708 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.query.bias +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.key.weight +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.key.bias +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.value.weight +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.self.value.bias +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.output.dense.weight +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.output.dense.bias +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.output.LayerNorm.weight +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.attention.output.LayerNorm.bias +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.intermediate.dense.weight +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.intermediate.dense.bias +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.output.dense.weight +2025-03-24 13:21:39,709 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.output.dense.bias +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.output.LayerNorm.weight +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.10.output.LayerNorm.bias +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.query.weight +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.query.bias +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.key.weight +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.key.bias +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.value.weight +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.self.value.bias +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.output.dense.weight +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.output.dense.bias +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.output.LayerNorm.weight +2025-03-24 13:21:39,710 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.attention.output.LayerNorm.bias +2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.intermediate.dense.weight +2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.intermediate.dense.bias +2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.output.dense.weight +2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.output.dense.bias +2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.output.LayerNorm.weight +2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.encoder.layer.11.output.LayerNorm.bias +2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.pooler.dense.weight +2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.embedding.pooler.dense.bias +2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.trans.weight +2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.trans.bias +2025-03-24 13:21:39,711 - INFO - allennlp.training.trainer_pieces - _encoder.norm.weight +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.norm.bias +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.lstm_embedding.weight +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.lstm.weight_ih_l0 +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.lstm.weight_hh_l0 +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.lstm.bias_ih_l0 +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.lstm.bias_hh_l0 +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.concat_trans.weight +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.concat_trans.bias +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.concat_norm.weight +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.concat_norm.bias +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.merge_lstm.weight_ih_l0 +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.merge_lstm.weight_hh_l0 +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.merge_lstm.bias_ih_l0 +2025-03-24 13:21:39,712 - INFO - allennlp.training.trainer_pieces - _encoder.merge_lstm.bias_hh_l0 +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_mlp.weight +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_mlp.bias +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_norm.weight +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_norm.bias +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.early_gru.weight_ih_l0 +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.early_gru.weight_hh_l0 +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.early_gru.bias_ih_l0 +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.early_gru.bias_hh_l0 +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_v.weight +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_v.bias +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_k.weight +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_k.bias +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_q.weight +2025-03-24 13:21:39,713 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_q.bias +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_merge.weight +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt1.linear_merge.bias +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_v.weight +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_v.bias +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_k.weight +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_k.bias +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_q.weight +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_q.bias +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_merge.weight +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.mhatt2.linear_merge.bias +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.ffn.mlp.linear.weight +2025-03-24 13:21:39,714 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.ffn.mlp.linear.bias +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm1.a_2 +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm1.b_2 +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm2.a_2 +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm2.b_2 +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm3.a_2 +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.0.norm3.b_2 +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_v.weight +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_v.bias +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_k.weight +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_k.bias +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_q.weight +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_q.bias +2025-03-24 13:21:39,715 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_merge.weight +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt1.linear_merge.bias +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_v.weight +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_v.bias +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_k.weight +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_k.bias +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_q.weight +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_q.bias +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_merge.weight +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.mhatt2.linear_merge.bias +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.ffn.mlp.fc.linear.weight +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.ffn.mlp.fc.linear.bias +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.ffn.mlp.linear.weight +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.ffn.mlp.linear.bias +2025-03-24 13:21:39,716 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm1.a_2 +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm1.b_2 +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm2.a_2 +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm2.b_2 +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm3.a_2 +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _encoder.merge_att.sga_list.1.norm3.b_2 +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _target_embedder.weight +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _decoder_cell.weight_ih +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _decoder_cell.weight_hh +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _decoder_cell.bias_ih +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _decoder_cell.bias_hh +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _output_projection_layer.weight +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - _output_projection_layer.bias +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - vit_model.vit.embeddings.cls_token +2025-03-24 13:21:39,717 - INFO - allennlp.training.trainer_pieces - vit_model.vit.embeddings.patch_embeddings.projection.weight +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.embeddings.patch_embeddings.projection.bias +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.query.weight +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.query.bias +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.key.weight +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.key.bias +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.value.weight +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.attention.value.bias +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.output.dense.weight +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.attention.output.dense.bias +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.intermediate.dense.weight +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.intermediate.dense.bias +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.output.dense.weight +2025-03-24 13:21:39,718 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.output.dense.bias +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.layernorm_before.weight +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.layernorm_before.bias +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.layernorm_after.weight +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.0.layernorm_after.bias +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.query.weight +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.query.bias +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.key.weight +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.key.bias +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.value.weight +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.attention.value.bias +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.output.dense.weight +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.attention.output.dense.bias +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.intermediate.dense.weight +2025-03-24 13:21:39,719 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.intermediate.dense.bias +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.output.dense.weight +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.output.dense.bias +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.layernorm_before.weight +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.layernorm_before.bias +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.layernorm_after.weight +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.1.layernorm_after.bias +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.query.weight +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.query.bias +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.key.weight +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.key.bias +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.value.weight +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.attention.value.bias +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.output.dense.weight +2025-03-24 13:21:39,720 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.attention.output.dense.bias +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.intermediate.dense.weight +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.intermediate.dense.bias +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.output.dense.weight +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.output.dense.bias +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.layernorm_before.weight +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.layernorm_before.bias +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.layernorm_after.weight +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.2.layernorm_after.bias +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.query.weight +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.query.bias +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.key.weight +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.key.bias +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.value.weight +2025-03-24 13:21:39,721 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.attention.value.bias +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.output.dense.weight +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.attention.output.dense.bias +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.intermediate.dense.weight +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.intermediate.dense.bias +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.output.dense.weight +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.output.dense.bias +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.layernorm_before.weight +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.layernorm_before.bias +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.layernorm_after.weight +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.3.layernorm_after.bias +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.query.weight +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.query.bias +2025-03-24 13:21:39,722 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.key.weight +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.key.bias +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.value.weight +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.attention.value.bias +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.output.dense.weight +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.attention.output.dense.bias +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.intermediate.dense.weight +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.intermediate.dense.bias +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.output.dense.weight +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.output.dense.bias +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.layernorm_before.weight +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.layernorm_before.bias +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.layernorm_after.weight +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.4.layernorm_after.bias +2025-03-24 13:21:39,723 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.query.weight +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.query.bias +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.key.weight +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.key.bias +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.value.weight +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.attention.value.bias +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.output.dense.weight +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.attention.output.dense.bias +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.intermediate.dense.weight +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.intermediate.dense.bias +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.output.dense.weight +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.output.dense.bias +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.layernorm_before.weight +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.layernorm_before.bias +2025-03-24 13:21:39,724 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.layernorm_after.weight +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.5.layernorm_after.bias +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.query.weight +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.query.bias +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.key.weight +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.key.bias +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.value.weight +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.attention.value.bias +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.output.dense.weight +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.attention.output.dense.bias +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.intermediate.dense.weight +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.intermediate.dense.bias +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.output.dense.weight +2025-03-24 13:21:39,725 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.output.dense.bias +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.layernorm_before.weight +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.layernorm_before.bias +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.layernorm_after.weight +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.6.layernorm_after.bias +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.query.weight +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.query.bias +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.key.weight +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.key.bias +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.value.weight +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.attention.value.bias +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.output.dense.weight +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.attention.output.dense.bias +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.intermediate.dense.weight +2025-03-24 13:21:39,726 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.intermediate.dense.bias +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.output.dense.weight +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.output.dense.bias +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.layernorm_before.weight +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.layernorm_before.bias +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.layernorm_after.weight +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.7.layernorm_after.bias +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.query.weight +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.query.bias +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.key.weight +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.key.bias +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.value.weight +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.attention.value.bias +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.output.dense.weight +2025-03-24 13:21:39,727 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.attention.output.dense.bias +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.intermediate.dense.weight +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.intermediate.dense.bias +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.output.dense.weight +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.output.dense.bias +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.layernorm_before.weight +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.layernorm_before.bias +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.layernorm_after.weight +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.8.layernorm_after.bias +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.query.weight +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.query.bias +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.key.weight +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.key.bias +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.value.weight +2025-03-24 13:21:39,728 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.attention.value.bias +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.output.dense.weight +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.attention.output.dense.bias +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.intermediate.dense.weight +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.intermediate.dense.bias +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.output.dense.weight +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.output.dense.bias +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.layernorm_before.weight +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.layernorm_before.bias +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.layernorm_after.weight +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.9.layernorm_after.bias +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.query.weight +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.query.bias +2025-03-24 13:21:39,729 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.key.weight +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.key.bias +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.value.weight +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.attention.value.bias +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.output.dense.weight +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.attention.output.dense.bias +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.intermediate.dense.weight +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.intermediate.dense.bias +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.output.dense.weight +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.output.dense.bias +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.layernorm_before.weight +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.layernorm_before.bias +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.layernorm_after.weight +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.10.layernorm_after.bias +2025-03-24 13:21:39,730 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.query.weight +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.query.bias +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.key.weight +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.key.bias +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.value.weight +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.attention.value.bias +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.output.dense.weight +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.attention.output.dense.bias +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.intermediate.dense.weight +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.intermediate.dense.bias +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.output.dense.weight +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.output.dense.bias +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.layernorm_before.weight +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.layernorm_before.bias +2025-03-24 13:21:39,731 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.layernorm_after.weight +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.vit.encoder.layer.11.layernorm_after.bias +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.vit.layernorm.weight +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.vit.layernorm.bias +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.mask_token +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_embed.weight +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_embed.bias +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.query.weight +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.query.bias +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.key.weight +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.key.bias +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.value.weight +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.attention.value.bias +2025-03-24 13:21:39,732 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.output.dense.weight +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.attention.output.dense.bias +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.intermediate.dense.weight +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.intermediate.dense.bias +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.output.dense.weight +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.output.dense.bias +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.layernorm_before.weight +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.layernorm_before.bias +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.layernorm_after.weight +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.0.layernorm_after.bias +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.query.weight +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.query.bias +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.key.weight +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.key.bias +2025-03-24 13:21:39,733 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.value.weight +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.attention.value.bias +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.output.dense.weight +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.attention.output.dense.bias +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.intermediate.dense.weight +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.intermediate.dense.bias +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.output.dense.weight +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.output.dense.bias +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.layernorm_before.weight +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.layernorm_before.bias +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.layernorm_after.weight +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.1.layernorm_after.bias +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.query.weight +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.query.bias +2025-03-24 13:21:39,734 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.key.weight +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.key.bias +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.value.weight +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.attention.value.bias +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.output.dense.weight +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.attention.output.dense.bias +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.intermediate.dense.weight +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.intermediate.dense.bias +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.output.dense.weight +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.output.dense.bias +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.layernorm_before.weight +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.layernorm_before.bias +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.layernorm_after.weight +2025-03-24 13:21:39,735 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.2.layernorm_after.bias +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.query.weight +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.query.bias +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.key.weight +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.key.bias +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.value.weight +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.attention.value.bias +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.output.dense.weight +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.attention.output.dense.bias +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.intermediate.dense.weight +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.intermediate.dense.bias +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.output.dense.weight +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.output.dense.bias +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.layernorm_before.weight +2025-03-24 13:21:39,736 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.layernorm_before.bias +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.layernorm_after.weight +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.3.layernorm_after.bias +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.query.weight +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.query.bias +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.key.weight +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.key.bias +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.value.weight +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.attention.value.bias +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.output.dense.weight +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.attention.output.dense.bias +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.intermediate.dense.weight +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.intermediate.dense.bias +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.output.dense.weight +2025-03-24 13:21:39,737 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.output.dense.bias +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.layernorm_before.weight +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.layernorm_before.bias +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.layernorm_after.weight +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.4.layernorm_after.bias +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.query.weight +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.query.bias +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.key.weight +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.key.bias +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.value.weight +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.attention.value.bias +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.output.dense.weight +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.attention.output.dense.bias +2025-03-24 13:21:39,738 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.intermediate.dense.weight +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.intermediate.dense.bias +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.output.dense.weight +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.output.dense.bias +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.layernorm_before.weight +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.layernorm_before.bias +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.layernorm_after.weight +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.5.layernorm_after.bias +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.query.weight +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.query.bias +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.key.weight +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.key.bias +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.value.weight +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.attention.value.bias +2025-03-24 13:21:39,739 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.output.dense.weight +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.attention.output.dense.bias +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.intermediate.dense.weight +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.intermediate.dense.bias +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.output.dense.weight +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.output.dense.bias +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.layernorm_before.weight +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.layernorm_before.bias +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.layernorm_after.weight +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.6.layernorm_after.bias +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.query.weight +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.query.bias +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.key.weight +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.key.bias +2025-03-24 13:21:39,740 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.value.weight +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.attention.value.bias +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.output.dense.weight +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.attention.output.dense.bias +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.intermediate.dense.weight +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.intermediate.dense.bias +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.output.dense.weight +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.output.dense.bias +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.layernorm_before.weight +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.layernorm_before.bias +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.layernorm_after.weight +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_layers.7.layernorm_after.bias +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_norm.weight +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_norm.bias +2025-03-24 13:21:39,741 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_pred.weight +2025-03-24 13:21:39,742 - INFO - allennlp.training.trainer_pieces - vit_model.decoder.decoder_pred.bias +2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.patience = None +2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.validation_metric = +acc +2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.shuffle = True +2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.num_epochs = 100 +2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.cuda_device = 0 +2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.grad_norm = 10 +2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.grad_clipping = None +2025-03-24 13:21:39,742 - INFO - allennlp.common.params - trainer.momentum_scheduler = None +2025-03-24 13:21:44,607 - INFO - allennlp.common.params - trainer.optimizer.type = adam +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently. +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - trainer.optimizer.parameter_groups.0.1.lr = 1e-05 +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently. +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - trainer.optimizer.parameter_groups.1.1.lr = 1e-05 +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently. +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - trainer.optimizer.parameter_groups.2.1.lr = 2e-05 +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently. +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: +2025-03-24 13:21:44,608 - INFO - allennlp.common.params - trainer.optimizer.parameter_groups.3.1.lr = 0.001 +2025-03-24 13:21:44,621 - INFO - allennlp.training.optimizers - Done constructing parameter groups. +2025-03-24 13:21:44,621 - INFO - allennlp.training.optimizers - Group 0: ['mcan.dec_list.2.norm2.a_2', 'mcan.enc_list.0.ffn.mlp.fc.linear.weight', 'mcan.enc_list.4.mhatt.linear_k.bias', 'mcan.enc_list.3.ffn.mlp.fc.linear.weight', 'mcan.dec_list.2.norm2.b_2', 'mcan.enc_list.2.norm2.a_2', '_encoder.merge_att.sga_list.0.mhatt2.linear_merge.weight', 'mcan.enc_list.5.mhatt.linear_k.weight', 'mcan.dec_list.1.ffn.mlp.fc.linear.bias', 'mcan.dec_list.5.ffn.mlp.linear.weight', 'mcan.dec_list.1.norm3.a_2', 'mcan.dec_list.0.mhatt2.linear_v.bias', 'mcan.enc_list.5.ffn.mlp.linear.bias', 'mcan.dec_list.4.mhatt2.linear_k.bias', '_encoder.merge_att.sga_list.1.mhatt1.linear_v.bias', '_encoder.merge_att.sga_list.1.ffn.mlp.fc.linear.weight', '_encoder.merge_att.sga_list.1.mhatt2.linear_q.weight', 'mcan.enc_list.2.ffn.mlp.fc.linear.bias', 'mcan.dec_list.2.mhatt2.linear_merge.bias', 'mcan.enc_list.5.mhatt.linear_v.bias', 'mcan.dec_list.5.mhatt2.linear_k.bias', 'mcan.dec_list.4.norm3.b_2', 'mcan.dec_list.0.norm2.b_2', 'mcan.dec_list.4.ffn.mlp.linear.weight', '_encoder.merge_att.sga_list.0.mhatt1.linear_q.weight', 'mcan.dec_list.3.mhatt2.linear_merge.bias', '_encoder.merge_att.sga_list.0.mhatt1.linear_k.weight', '_encoder.merge_att.sga_list.0.mhatt2.linear_k.bias', '_encoder.merge_att.sga_list.0.norm3.b_2', 'mcan.dec_list.1.mhatt1.linear_v.weight', 'mcan.enc_list.0.mhatt.linear_q.weight', 'mcan.dec_list.5.mhatt2.linear_q.weight', '_encoder.merge_att.sga_list.0.mhatt2.linear_merge.bias', 'attflat_img.mlp.linear.bias', 'mcan.dec_list.1.mhatt2.linear_q.weight', 'mcan.dec_list.3.norm2.a_2', 'mcan.dec_list.3.ffn.mlp.fc.linear.bias', 'mcan.dec_list.4.mhatt1.linear_v.weight', 'mcan.enc_list.3.ffn.mlp.fc.linear.bias', 'mcan.dec_list.5.norm1.a_2', '_encoder.merge_att.sga_list.1.mhatt2.linear_q.bias', 'mcan.enc_list.4.mhatt.linear_q.weight', 'mcan.enc_list.4.mhatt.linear_merge.bias', 'mcan.enc_list.2.mhatt.linear_k.bias', '_encoder.merge_att.sga_list.1.norm2.b_2', '_encoder.merge_att.sga_list.0.norm2.a_2', '_encoder.merge_att.sga_list.1.ffn.mlp.linear.weight', 'mcan.enc_list.5.mhatt.linear_q.bias', 'mcan.enc_list.3.mhatt.linear_k.weight', '_encoder.merge_att.sga_list.0.norm2.b_2', 'mcan.dec_list.4.ffn.mlp.linear.bias', 'mcan.enc_list.2.mhatt.linear_q.weight', 'mcan.enc_list.4.ffn.mlp.linear.bias', 'mcan.dec_list.1.mhatt2.linear_merge.weight', 'mcan.dec_list.3.mhatt1.linear_merge.weight', '_encoder.merge_att.sga_list.0.mhatt1.linear_v.weight', 'mcan.dec_list.2.mhatt1.linear_v.weight', 'mcan.dec_list.5.mhatt2.linear_merge.bias', 'mcan.enc_list.4.mhatt.linear_q.bias', 'mcan.dec_list.2.mhatt2.linear_k.weight', 'mcan.enc_list.4.ffn.mlp.fc.linear.bias', 'mcan.dec_list.4.mhatt2.linear_v.bias', 'mcan.dec_list.0.mhatt1.linear_merge.bias', '_encoder.merge_att.sga_list.1.mhatt1.linear_k.weight', 'mcan.dec_list.0.mhatt2.linear_k.weight', 'mcan.enc_list.1.mhatt.linear_q.bias', 'attflat_lang.mlp.linear.bias', 'mcan.dec_list.2.mhatt2.linear_v.weight', 'mcan.enc_list.3.mhatt.linear_v.bias', '_encoder.merge_att.sga_list.0.ffn.mlp.fc.linear.bias', 'attflat_lang.linear_merge.bias', 'mcan.dec_list.5.mhatt1.linear_k.weight', 'mcan.dec_list.0.norm3.a_2', 'mcan.enc_list.2.mhatt.linear_v.bias', 'mcan.dec_list.1.mhatt2.linear_k.bias', 'mcan.dec_list.2.ffn.mlp.linear.weight', 'mcan.enc_list.1.ffn.mlp.fc.linear.weight', '_encoder.merge_att.sga_list.1.mhatt1.linear_v.weight', '_encoder.merge_att.sga_list.1.mhatt1.linear_k.bias', 'mcan.enc_list.3.norm1.b_2', 'mcan.dec_list.2.mhatt2.linear_merge.weight', '_encoder.merge_att.sga_list.0.norm1.a_2', 'mcan.dec_list.4.mhatt1.linear_merge.weight', 'mcan.dec_list.4.mhatt1.linear_q.weight', 'mcan.dec_list.5.ffn.mlp.fc.linear.weight', 'mcan.enc_list.5.mhatt.linear_merge.weight', 'mcan.dec_list.3.ffn.mlp.linear.bias', '_encoder.merge_att.sga_list.0.mhatt1.linear_q.bias', 'mcan.dec_list.2.ffn.mlp.linear.bias', 'mcan.dec_list.2.mhatt1.linear_k.bias', 'mcan.enc_list.1.ffn.mlp.fc.linear.bias', 'mcan.dec_list.3.mhatt2.linear_q.bias', 'mcan.enc_list.3.mhatt.linear_merge.bias', 'mcan.dec_list.2.mhatt1.linear_k.weight', '_encoder.merge_att.sga_list.1.mhatt2.linear_v.weight', 'mcan.dec_list.1.mhatt1.linear_merge.weight', 'mcan.enc_list.3.mhatt.linear_q.weight', '_encoder.merge_att.sga_list.1.mhatt1.linear_merge.bias', 'mcan.dec_list.5.norm3.a_2', '_encoder.merge_att.sga_list.0.norm1.b_2', 'mcan.enc_list.4.ffn.mlp.fc.linear.weight', 'mcan.dec_list.5.norm2.b_2', 'mcan.enc_list.5.norm1.b_2', 'mcan.enc_list.4.norm1.a_2', 'mcan.dec_list.5.mhatt1.linear_merge.bias', 'mcan.enc_list.4.mhatt.linear_v.bias', 'mcan.dec_list.5.mhatt1.linear_v.weight', 'attflat_lang.mlp.fc.linear.bias', 'mcan.dec_list.4.mhatt2.linear_merge.bias', 'mcan.enc_list.0.norm2.b_2', 'mcan.enc_list.2.mhatt.linear_q.bias', 'mcan.dec_list.5.mhatt1.linear_k.bias', 'mcan.enc_list.0.norm1.a_2', 'mcan.enc_list.1.mhatt.linear_q.weight', 'mcan.enc_list.3.norm2.a_2', 'mcan.dec_list.2.norm3.b_2', 'mcan.dec_list.0.norm1.a_2', 'mcan.dec_list.0.mhatt1.linear_v.bias', 'mcan.dec_list.4.mhatt2.linear_v.weight', '_encoder.merge_att.sga_list.0.mhatt2.linear_k.weight', 'mcan.enc_list.0.mhatt.linear_v.bias', '_encoder.merge_att.sga_list.1.norm1.a_2', '_encoder.merge_att.sga_list.0.mhatt1.linear_merge.bias', 'mcan.enc_list.4.mhatt.linear_k.weight', 'mcan.dec_list.1.ffn.mlp.linear.bias', 'mcan.enc_list.5.mhatt.linear_v.weight', 'mcan.enc_list.3.mhatt.linear_k.bias', 'mcan.dec_list.0.ffn.mlp.linear.bias', 'mcan.dec_list.2.mhatt2.linear_q.bias', 'mcan.dec_list.0.mhatt1.linear_k.bias', 'mcan.dec_list.0.mhatt2.linear_v.weight', 'attflat_img.mlp.linear.weight', 'attflat_img.mlp.fc.linear.weight', 'mcan.enc_list.0.mhatt.linear_merge.bias', 'mcan.dec_list.2.mhatt2.linear_k.bias', 'mcan.enc_list.0.norm1.b_2', 'mcan.dec_list.1.mhatt1.linear_merge.bias', 'mcan.enc_list.4.ffn.mlp.linear.weight', 'mcan.enc_list.2.norm1.b_2', 'mcan.dec_list.1.mhatt1.linear_v.bias', 'mcan.enc_list.5.ffn.mlp.fc.linear.weight', 'mcan.dec_list.4.mhatt2.linear_q.weight', 'mcan.dec_list.2.mhatt1.linear_merge.weight', 'mcan.dec_list.3.mhatt2.linear_q.weight', 'mcan.dec_list.0.mhatt2.linear_merge.bias', 'mcan.dec_list.0.ffn.mlp.fc.linear.weight', 'mcan.dec_list.5.mhatt2.linear_v.bias', 'mcan.dec_list.4.norm2.b_2', 'mcan.dec_list.3.mhatt1.linear_v.bias', 'decode_transform.bias', '_encoder.merge_att.sga_list.0.mhatt2.linear_v.weight', 'mcan.dec_list.0.ffn.mlp.linear.weight', 'mcan.dec_list.0.ffn.mlp.fc.linear.bias', 'mcan.dec_list.5.mhatt2.linear_merge.weight', 'mcan.dec_list.1.mhatt1.linear_k.weight', 'mcan.dec_list.4.mhatt2.linear_k.weight', 'mcan.dec_list.1.norm3.b_2', 'mcan.dec_list.0.norm1.b_2', 'attflat_img.linear_merge.weight', 'mcan.dec_list.3.norm3.b_2', 'mcan.enc_list.3.mhatt.linear_v.weight', 'mcan.dec_list.3.mhatt2.linear_v.bias', 'mcan.dec_list.3.mhatt1.linear_q.bias', 'mcan.enc_list.3.mhatt.linear_q.bias', 'mcan.enc_list.5.ffn.mlp.fc.linear.bias', 'mcan.dec_list.3.ffn.mlp.fc.linear.weight', '_encoder.merge_att.sga_list.1.mhatt1.linear_q.weight', 'mcan.dec_list.1.ffn.mlp.fc.linear.weight', 'mcan.enc_list.5.mhatt.linear_merge.bias', 'mcan.dec_list.2.mhatt1.linear_q.bias', 'mcan.enc_list.1.norm1.a_2', 'attflat_lang.mlp.fc.linear.weight', '_encoder.merge_att.sga_list.0.mhatt2.linear_q.bias', 'mcan.dec_list.1.mhatt2.linear_q.bias', 'mcan.enc_list.1.mhatt.linear_k.bias', 'mcan.dec_list.2.mhatt1.linear_q.weight', 'mcan.dec_list.3.mhatt1.linear_k.bias', 'mcan.enc_list.1.mhatt.linear_merge.weight', 'mcan.enc_list.3.norm1.a_2', 'mcan.dec_list.1.norm2.b_2', 'mcan.dec_list.4.mhatt1.linear_v.bias', 'mcan.enc_list.0.mhatt.linear_k.bias', '_encoder.merge_att.sga_list.0.mhatt1.linear_merge.weight', 'mcan.enc_list.4.mhatt.linear_v.weight', '_encoder.merge_att.sga_list.1.norm3.b_2', 'mcan.dec_list.3.ffn.mlp.linear.weight', 'mcan.enc_list.2.norm2.b_2', 'mcan.enc_list.0.norm2.a_2', 'mcan.enc_list.0.mhatt.linear_q.bias', 'mcan.dec_list.2.mhatt2.linear_q.weight', 'mcan.dec_list.4.norm2.a_2', 'mcan.dec_list.0.mhatt1.linear_q.weight', '_encoder.merge_att.sga_list.0.ffn.mlp.fc.linear.weight', 'mcan.enc_list.1.norm2.a_2', 'channel_transform.bias', 'mcan.dec_list.2.norm1.b_2', 'mcan.enc_list.1.ffn.mlp.linear.bias', '_encoder.merge_att.sga_list.0.mhatt2.linear_q.weight', 'mcan.dec_list.1.mhatt2.linear_merge.bias', 'mcan.dec_list.1.norm2.a_2', '_encoder.merge_att.sga_list.0.mhatt1.linear_k.bias', '_encoder.merge_att.sga_list.0.mhatt1.linear_v.bias', 'mcan.dec_list.0.norm3.b_2', 'mcan.enc_list.5.mhatt.linear_k.bias', 'attflat_lang.linear_merge.weight', 'mcan.dec_list.5.mhatt1.linear_q.bias', 'mcan.dec_list.3.mhatt2.linear_k.bias', 'mcan.dec_list.0.mhatt1.linear_v.weight', 'mcan.dec_list.5.norm2.a_2', 'mcan.dec_list.4.mhatt1.linear_merge.bias', 'mcan.enc_list.4.norm2.a_2', '_encoder.merge_att.sga_list.1.norm3.a_2', 'mcan.dec_list.5.mhatt2.linear_q.bias', 'mcan.dec_list.4.ffn.mlp.fc.linear.weight', 'decode_transform.weight', 'mcan.dec_list.0.mhatt1.linear_merge.weight', 'mcan.enc_list.5.ffn.mlp.linear.weight', 'mcan.dec_list.2.ffn.mlp.fc.linear.bias', 'mcan.enc_list.1.ffn.mlp.linear.weight', 'channel_transform.weight', 'attflat_img.mlp.fc.linear.bias', 'mcan.enc_list.4.norm2.b_2', 'mcan.enc_list.2.mhatt.linear_merge.weight', 'mcan.dec_list.2.mhatt1.linear_merge.bias', '_encoder.merge_att.sga_list.1.ffn.mlp.fc.linear.bias', 'mcan.enc_list.2.ffn.mlp.linear.bias', 'mcan.dec_list.4.mhatt2.linear_q.bias', 'mcan.dec_list.3.mhatt1.linear_v.weight', '_encoder.merge_att.sga_list.1.mhatt1.linear_merge.weight', 'mcan.dec_list.0.mhatt2.linear_k.bias', 'mcan.dec_list.4.mhatt1.linear_k.weight', 'mcan.enc_list.1.norm1.b_2', 'mcan.dec_list.4.mhatt2.linear_merge.weight', 'mcan.dec_list.2.mhatt1.linear_v.bias', 'mcan.dec_list.1.ffn.mlp.linear.weight', '_encoder.merge_att.sga_list.0.mhatt2.linear_v.bias', 'mcan.dec_list.0.mhatt2.linear_q.weight', 'mcan.enc_list.3.norm2.b_2', 'mcan.enc_list.4.mhatt.linear_merge.weight', '_encoder.merge_att.sga_list.1.mhatt2.linear_k.bias', 'mcan.enc_list.2.ffn.mlp.linear.weight', 'mcan.enc_list.1.mhatt.linear_v.bias', 'mcan.enc_list.1.norm2.b_2', 'mcan.enc_list.3.mhatt.linear_merge.weight', 'mcan.dec_list.3.norm2.b_2', 'mcan.dec_list.5.mhatt2.linear_k.weight', 'mcan.dec_list.5.mhatt1.linear_q.weight', 'mcan.dec_list.4.norm3.a_2', 'mcan.dec_list.0.mhatt1.linear_q.bias', '_encoder.merge_att.sga_list.1.norm2.a_2', 'mcan.dec_list.5.ffn.mlp.linear.bias', 'mcan.dec_list.3.mhatt2.linear_v.weight', 'mcan.dec_list.5.ffn.mlp.fc.linear.bias', 'mcan.enc_list.0.mhatt.linear_merge.weight', 'mcan.enc_list.2.mhatt.linear_k.weight', 'mcan.dec_list.1.mhatt1.linear_q.weight', 'mcan.dec_list.0.mhatt2.linear_q.bias', 'mcan.dec_list.1.norm1.a_2', 'mcan.enc_list.5.norm2.a_2', 'mcan.dec_list.0.mhatt1.linear_k.weight', '_encoder.merge_att.sga_list.0.norm3.a_2', 'mcan.dec_list.1.mhatt2.linear_k.weight', 'mcan.dec_list.4.norm1.b_2', 'mcan.dec_list.3.mhatt1.linear_merge.bias', 'mcan.dec_list.5.norm1.b_2', 'mcan.dec_list.5.mhatt1.linear_merge.weight', 'mcan.enc_list.0.ffn.mlp.linear.bias', 'mcan.enc_list.1.mhatt.linear_k.weight', 'mcan.enc_list.1.mhatt.linear_merge.bias', 'mcan.dec_list.0.mhatt2.linear_merge.weight', '_encoder.merge_att.sga_list.1.mhatt2.linear_merge.bias', 'mcan.dec_list.1.mhatt1.linear_q.bias', 'mcan.dec_list.4.mhatt1.linear_q.bias', 'mcan.enc_list.4.norm1.b_2', 'mcan.dec_list.3.norm3.a_2', 'mcan.dec_list.4.mhatt1.linear_k.bias', 'mcan.dec_list.4.ffn.mlp.fc.linear.bias', 'mcan.dec_list.3.norm1.b_2', 'mcan.dec_list.2.ffn.mlp.fc.linear.weight', 'mcan.enc_list.5.norm2.b_2', '_encoder.merge_att.sga_list.1.mhatt1.linear_q.bias', 'mcan.enc_list.0.ffn.mlp.fc.linear.bias', 'mcan.enc_list.2.norm1.a_2', 'mcan.enc_list.3.ffn.mlp.linear.weight', 'mcan.dec_list.5.mhatt1.linear_v.bias', '_encoder.merge_att.sga_list.1.norm1.b_2', 'mcan.dec_list.5.norm3.b_2', 'mcan.enc_list.0.mhatt.linear_v.weight', '_encoder.merge_att.sga_list.0.ffn.mlp.linear.bias', '_encoder.merge_att.sga_list.0.ffn.mlp.linear.weight', 'mcan.dec_list.5.mhatt2.linear_v.weight', 'mcan.enc_list.5.norm1.a_2', 'mcan.dec_list.0.norm2.a_2', 'mcan.dec_list.2.norm1.a_2', 'mcan.dec_list.3.mhatt1.linear_q.weight', 'mcan.enc_list.5.mhatt.linear_q.weight', 'mcan.dec_list.3.mhatt2.linear_k.weight', 'mcan.enc_list.2.mhatt.linear_v.weight', 'mcan.dec_list.1.mhatt2.linear_v.weight', 'mcan.enc_list.0.ffn.mlp.linear.weight', 'mcan.dec_list.1.norm1.b_2', 'mcan.dec_list.3.norm1.a_2', 'mcan.enc_list.0.mhatt.linear_k.weight', 'mcan.dec_list.3.mhatt1.linear_k.weight', 'attflat_img.linear_merge.bias', '_encoder.merge_att.sga_list.1.mhatt2.linear_k.weight', 'mcan.dec_list.2.norm3.a_2', '_encoder.merge_att.sga_list.1.mhatt2.linear_v.bias', 'attflat_lang.mlp.linear.weight', 'mcan.enc_list.2.mhatt.linear_merge.bias', '_encoder.merge_att.sga_list.1.mhatt2.linear_merge.weight', 'mcan.dec_list.1.mhatt1.linear_k.bias', 'mcan.dec_list.2.mhatt2.linear_v.bias', '_encoder.merge_att.sga_list.1.ffn.mlp.linear.bias', 'mcan.enc_list.3.ffn.mlp.linear.bias', 'mcan.dec_list.4.norm1.a_2', 'mcan.enc_list.1.mhatt.linear_v.weight', 'mcan.dec_list.3.mhatt2.linear_merge.weight', 'mcan.enc_list.2.ffn.mlp.fc.linear.weight', 'mcan.dec_list.1.mhatt2.linear_v.bias'], {'lr': 1e-05} +2025-03-24 13:21:44,621 - INFO - allennlp.training.optimizers - Group 1: [], {'lr': 1e-05} +2025-03-24 13:21:44,621 - INFO - allennlp.training.optimizers - Group 2: ['_encoder.embedding.encoder.layer.5.attention.self.value.bias', '_encoder.embedding.encoder.layer.4.attention.self.query.weight', '_encoder.embedding.encoder.layer.4.attention.output.LayerNorm.weight', '_encoder.embedding.pooler.dense.bias', '_encoder.embedding.encoder.layer.10.attention.self.value.weight', '_encoder.embedding.encoder.layer.3.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.11.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.8.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.7.attention.self.query.bias', '_encoder.embedding.encoder.layer.8.attention.self.query.bias', '_encoder.embedding.encoder.layer.6.attention.output.dense.weight', '_encoder.embedding.encoder.layer.4.attention.self.key.weight', '_encoder.embedding.encoder.layer.5.intermediate.dense.weight', '_encoder.embedding.encoder.layer.10.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.3.intermediate.dense.bias', '_encoder.embedding.encoder.layer.1.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.11.attention.output.dense.weight', '_encoder.embedding.encoder.layer.6.intermediate.dense.bias', '_encoder.embedding.encoder.layer.6.attention.self.query.weight', '_encoder.embedding.encoder.layer.6.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.7.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.10.attention.output.dense.bias', '_encoder.embedding.encoder.layer.10.attention.self.key.weight', '_encoder.embedding.encoder.layer.10.intermediate.dense.bias', '_encoder.embedding.encoder.layer.0.intermediate.dense.bias', '_encoder.embedding.encoder.layer.10.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.8.attention.self.key.weight', '_encoder.embedding.encoder.layer.7.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.9.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.11.attention.self.key.weight', '_encoder.embedding.encoder.layer.1.attention.output.dense.weight', '_encoder.embedding.encoder.layer.5.intermediate.dense.bias', '_encoder.embedding.encoder.layer.3.attention.self.key.weight', '_encoder.embedding.encoder.layer.6.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.7.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.5.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.1.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.2.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.8.output.dense.bias', '_encoder.embedding.encoder.layer.1.attention.self.key.bias', '_encoder.embedding.encoder.layer.9.attention.self.query.bias', '_encoder.embedding.embeddings.position_embeddings.weight', '_encoder.embedding.encoder.layer.5.attention.self.value.weight', '_encoder.embedding.encoder.layer.2.intermediate.dense.bias', '_encoder.embedding.encoder.layer.5.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.10.attention.self.query.weight', '_encoder.embedding.encoder.layer.0.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.5.attention.self.key.weight', '_encoder.embedding.encoder.layer.1.attention.self.key.weight', '_encoder.embedding.encoder.layer.4.attention.output.dense.bias', '_encoder.embedding.encoder.layer.4.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.4.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.11.attention.self.query.weight', '_encoder.embedding.encoder.layer.10.attention.self.query.bias', '_encoder.embedding.encoder.layer.10.attention.self.key.bias', '_encoder.embedding.encoder.layer.11.attention.self.key.bias', '_encoder.embedding.encoder.layer.4.intermediate.dense.weight', '_encoder.embedding.encoder.layer.6.attention.self.key.bias', '_encoder.embedding.encoder.layer.6.output.dense.weight', '_encoder.embedding.encoder.layer.1.output.dense.weight', '_encoder.embedding.encoder.layer.2.attention.self.key.weight', '_encoder.embedding.encoder.layer.2.attention.self.query.weight', '_encoder.embedding.encoder.layer.3.attention.self.key.bias', '_encoder.embedding.encoder.layer.0.attention.self.query.bias', '_encoder.embedding.encoder.layer.9.attention.output.dense.weight', '_encoder.embedding.encoder.layer.2.attention.self.value.weight', '_encoder.embedding.encoder.layer.7.intermediate.dense.bias', '_encoder.embedding.encoder.layer.1.attention.self.value.weight', '_encoder.embedding.encoder.layer.10.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.6.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.11.attention.self.query.bias', '_encoder.embedding.encoder.layer.1.attention.self.value.bias', '_encoder.embedding.encoder.layer.3.attention.output.dense.weight', '_encoder.embedding.encoder.layer.1.attention.self.query.weight', '_encoder.embedding.encoder.layer.2.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.6.attention.self.value.bias', '_encoder.embedding.encoder.layer.11.output.dense.weight', '_encoder.embedding.encoder.layer.4.output.dense.bias', '_encoder.embedding.embeddings.LayerNorm.bias', '_encoder.embedding.encoder.layer.2.attention.self.query.bias', '_encoder.embedding.encoder.layer.11.output.dense.bias', '_encoder.embedding.encoder.layer.0.attention.self.query.weight', '_encoder.embedding.encoder.layer.2.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.9.attention.self.value.weight', '_encoder.embedding.encoder.layer.11.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.10.attention.self.value.bias', '_encoder.embedding.encoder.layer.11.attention.output.dense.bias', '_encoder.embedding.encoder.layer.5.attention.output.dense.weight', '_encoder.embedding.encoder.layer.2.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.5.attention.self.key.bias', '_encoder.embedding.encoder.layer.8.attention.output.dense.bias', '_encoder.embedding.encoder.layer.0.attention.output.dense.weight', '_encoder.embedding.encoder.layer.5.output.dense.weight', '_encoder.embedding.embeddings.word_embeddings.weight', '_encoder.embedding.encoder.layer.3.output.dense.bias', '_encoder.embedding.encoder.layer.7.attention.output.dense.weight', '_encoder.embedding.encoder.layer.7.attention.self.value.weight', '_encoder.embedding.encoder.layer.8.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.1.intermediate.dense.bias', '_encoder.embedding.encoder.layer.11.attention.self.value.weight', '_encoder.embedding.encoder.layer.4.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.0.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.2.attention.self.key.bias', '_encoder.embedding.encoder.layer.3.intermediate.dense.weight', '_encoder.embedding.encoder.layer.6.attention.output.dense.bias', '_encoder.embedding.encoder.layer.9.attention.self.key.weight', '_encoder.embedding.encoder.layer.11.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.0.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.9.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.0.intermediate.dense.weight', '_encoder.embedding.encoder.layer.7.attention.output.dense.bias', '_encoder.embedding.encoder.layer.6.output.dense.bias', '_encoder.embedding.encoder.layer.7.attention.self.query.weight', '_encoder.embedding.encoder.layer.8.intermediate.dense.bias', '_encoder.embedding.encoder.layer.4.attention.self.value.weight', '_encoder.embedding.encoder.layer.1.attention.output.dense.bias', '_encoder.embedding.encoder.layer.3.output.dense.weight', '_encoder.embedding.encoder.layer.4.attention.output.dense.weight', '_encoder.embedding.encoder.layer.6.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.8.output.dense.weight', '_encoder.embedding.encoder.layer.8.intermediate.dense.weight', '_encoder.embedding.encoder.layer.7.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.0.attention.self.value.bias', '_encoder.embedding.encoder.layer.3.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.2.attention.output.dense.bias', '_encoder.embedding.encoder.layer.0.attention.self.value.weight', '_encoder.embedding.encoder.layer.9.output.dense.bias', '_encoder.embedding.encoder.layer.0.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.2.output.dense.weight', '_encoder.embedding.encoder.layer.1.intermediate.dense.weight', '_encoder.embedding.encoder.layer.9.intermediate.dense.bias', '_encoder.embedding.encoder.layer.8.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.0.output.dense.weight', '_encoder.embedding.encoder.layer.2.attention.self.value.bias', '_encoder.embedding.encoder.layer.10.attention.output.dense.weight', '_encoder.embedding.encoder.layer.5.attention.self.query.weight', '_encoder.embedding.encoder.layer.10.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.9.output.dense.weight', '_encoder.embedding.encoder.layer.11.intermediate.dense.bias', '_encoder.embedding.embeddings.token_type_embeddings.weight', '_encoder.embedding.encoder.layer.5.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.2.attention.output.dense.weight', '_encoder.embedding.encoder.layer.7.attention.self.key.weight', '_encoder.embedding.encoder.layer.7.attention.self.key.bias', '_encoder.embedding.encoder.layer.8.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.9.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.6.attention.self.key.weight', '_encoder.embedding.encoder.layer.1.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.10.output.dense.weight', '_encoder.embedding.encoder.layer.4.attention.self.query.bias', '_encoder.embedding.encoder.layer.0.attention.self.key.weight', '_encoder.embedding.encoder.layer.9.attention.output.dense.bias', '_encoder.embedding.encoder.layer.1.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.5.output.dense.bias', '_encoder.embedding.encoder.layer.6.attention.self.value.weight', '_encoder.embedding.encoder.layer.3.attention.self.query.bias', '_encoder.embedding.encoder.layer.3.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.4.intermediate.dense.bias', '_encoder.embedding.encoder.layer.9.intermediate.dense.weight', '_encoder.embedding.pooler.dense.weight', '_encoder.embedding.encoder.layer.2.intermediate.dense.weight', '_encoder.embedding.encoder.layer.3.attention.self.query.weight', '_encoder.embedding.encoder.layer.6.attention.self.query.bias', '_encoder.embedding.encoder.layer.8.attention.self.value.weight', '_encoder.embedding.encoder.layer.3.attention.self.value.weight', '_encoder.embedding.encoder.layer.7.output.dense.weight', '_encoder.embedding.encoder.layer.0.output.dense.bias', '_encoder.embedding.encoder.layer.8.attention.self.query.weight', '_encoder.embedding.encoder.layer.9.attention.self.value.bias', '_encoder.embedding.encoder.layer.3.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.2.output.dense.bias', '_encoder.embedding.encoder.layer.11.intermediate.dense.weight', '_encoder.embedding.encoder.layer.8.attention.self.key.bias', '_encoder.embedding.embeddings.LayerNorm.weight', '_encoder.embedding.encoder.layer.4.attention.self.key.bias', '_encoder.embedding.encoder.layer.10.intermediate.dense.weight', '_encoder.embedding.encoder.layer.1.output.dense.bias', '_encoder.embedding.encoder.layer.7.attention.self.value.bias', '_encoder.embedding.encoder.layer.5.attention.self.query.bias', '_encoder.embedding.encoder.layer.5.attention.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.7.intermediate.dense.weight', '_encoder.embedding.encoder.layer.1.attention.self.query.bias', '_encoder.embedding.encoder.layer.7.output.dense.bias', '_encoder.embedding.encoder.layer.6.intermediate.dense.weight', '_encoder.embedding.encoder.layer.11.attention.self.value.bias', '_encoder.embedding.encoder.layer.9.attention.self.query.weight', '_encoder.embedding.encoder.layer.9.attention.self.key.bias', '_encoder.embedding.encoder.layer.3.attention.output.dense.bias', '_encoder.embedding.encoder.layer.9.attention.output.LayerNorm.weight', '_encoder.embedding.encoder.layer.0.attention.self.key.bias', '_encoder.embedding.encoder.layer.0.attention.output.dense.bias', '_encoder.embedding.encoder.layer.3.attention.self.value.bias', '_encoder.embedding.encoder.layer.8.attention.output.dense.weight', '_encoder.embedding.encoder.layer.11.output.LayerNorm.bias', '_encoder.embedding.encoder.layer.8.attention.self.value.bias', '_encoder.embedding.encoder.layer.4.attention.self.value.bias', '_encoder.embedding.encoder.layer.5.attention.output.dense.bias', '_encoder.embedding.encoder.layer.10.output.dense.bias', '_encoder.embedding.encoder.layer.4.output.dense.weight'], {'lr': 2e-05} +2025-03-24 13:21:44,625 - INFO - allennlp.training.optimizers - Group 3: ['_encoder.lstm_embedding.weight', '_encoder.concat_trans.weight', '_encoder.concat_trans.bias', '_encoder.trans.weight', '_encoder.norm.weight', '_encoder.concat_norm.weight', '_encoder.norm.bias', '_encoder.concat_norm.bias', '_encoder.trans.bias'], {'lr': 0.001} +2025-03-24 13:21:44,625 - INFO - allennlp.training.optimizers - Group 4: ['vit_model.decoder.decoder_layers.5.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.3.intermediate.dense.weight', 'vit_model.vit.encoder.layer.5.attention.output.dense.bias', 'vit_model.vit.encoder.layer.10.output.dense.bias', 'vit_model.decoder.decoder_layers.3.layernorm_before.weight', 'vit_model.vit.encoder.layer.10.layernorm_after.bias', 'vit_model.decoder.decoder_layers.1.layernorm_before.bias', 'vit_model.vit.encoder.layer.7.intermediate.dense.weight', 'vit_model.vit.encoder.layer.2.intermediate.dense.weight', '_encoder.lstm.bias_ih_l0', 'vit_model.decoder.decoder_layers.0.intermediate.dense.bias', 'vit_model.vit.encoder.layer.3.attention.output.dense.weight', '_output_projection_layer.weight', '_encoder.lstm.bias_hh_l0', 'vit_model.decoder.decoder_layers.4.attention.attention.value.bias', 'vit_model.vit.encoder.layer.4.layernorm_before.weight', 'vit_model.decoder.decoder_layers.4.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.4.attention.attention.query.weight', 'vit_model.vit.encoder.layer.8.intermediate.dense.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.query.weight', 'vit_model.vit.encoder.layer.2.attention.attention.value.bias', 'vit_model.vit.encoder.layer.2.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.0.layernorm_before.bias', 'vit_model.vit.encoder.layer.6.attention.attention.value.weight', 'vit_model.vit.encoder.layer.5.attention.attention.value.bias', 'vit_model.vit.encoder.layer.4.intermediate.dense.bias', 'vit_model.vit.encoder.layer.8.intermediate.dense.weight', 'vit_model.decoder.decoder_layers.6.output.dense.bias', 'vit_model.vit.encoder.layer.11.attention.attention.query.bias', 'vit_model.vit.encoder.layer.7.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.3.output.dense.bias', 'vit_model.vit.encoder.layer.3.intermediate.dense.weight', 'vit_model.vit.encoder.layer.7.layernorm_after.bias', '_encoder.merge_lstm.weight_ih_l0', 'vit_model.decoder.decoder_layers.7.attention.attention.query.bias', 'vit_model.vit.encoder.layer.9.output.dense.bias', 'vit_model.decoder.decoder_layers.6.attention.output.dense.weight', '_encoder.early_gru.weight_ih_l0', 'vit_model.vit.encoder.layer.3.layernorm_after.bias', 'vit_model.decoder.decoder_layers.3.output.dense.weight', '_decoder_cell.weight_ih', 'vit_model.decoder.decoder_layers.4.output.dense.bias', 'vit_model.decoder.decoder_layers.1.layernorm_after.bias', 'vit_model.vit.encoder.layer.3.output.dense.bias', 'vit_model.vit.embeddings.patch_embeddings.projection.bias', 'vit_model.vit.encoder.layer.6.output.dense.bias', 'vit_model.vit.encoder.layer.10.layernorm_after.weight', 'vit_model.vit.encoder.layer.10.layernorm_before.bias', 'vit_model.vit.encoder.layer.11.attention.attention.key.bias', 'vit_model.vit.encoder.layer.4.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.4.layernorm_before.bias', 'vit_model.decoder.decoder_layers.7.attention.attention.query.weight', 'vit_model.decoder.decoder_layers.5.output.dense.weight', 'vit_model.vit.encoder.layer.2.attention.attention.query.weight', '_target_embedder.weight', 'vit_model.vit.encoder.layer.0.attention.output.dense.bias', 'vit_model.vit.encoder.layer.1.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.6.intermediate.dense.weight', 'vit_model.decoder.decoder_layers.6.attention.attention.value.bias', 'vit_model.vit.encoder.layer.5.output.dense.weight', 'vit_model.decoder.decoder_layers.6.output.dense.weight', '_output_projection_layer.bias', 'vit_model.decoder.decoder_layers.2.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.7.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.5.layernorm_after.weight', 'vit_model.vit.encoder.layer.3.layernorm_before.weight', 'vit_model.decoder.decoder_layers.0.attention.attention.query.bias', 'vit_model.decoder.decoder_layers.4.layernorm_after.weight', '_encoder.merge_lstm.weight_hh_l0', 'vit_model.decoder.decoder_layers.6.layernorm_after.bias', 'vit_model.vit.encoder.layer.2.output.dense.bias', 'vit_model.decoder.decoder_layers.1.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.2.attention.attention.query.weight', 'vit_model.vit.encoder.layer.8.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.0.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.7.intermediate.dense.weight', 'vit_model.vit.encoder.layer.4.attention.output.dense.weight', 'vit_model.vit.encoder.layer.5.attention.attention.query.bias', 'vit_model.vit.encoder.layer.1.intermediate.dense.bias', 'vit_model.decoder.decoder_layers.1.output.dense.bias', 'vit_model.decoder.decoder_layers.1.attention.attention.value.weight', 'vit_model.decoder.decoder_layers.1.attention.attention.query.bias', 'vit_model.decoder.decoder_layers.0.attention.attention.value.weight', 'vit_model.decoder.decoder_layers.0.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.0.output.dense.weight', 'vit_model.decoder.decoder_layers.7.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.2.attention.attention.value.weight', 'vit_model.vit.encoder.layer.11.layernorm_before.bias', 'vit_model.vit.encoder.layer.6.attention.attention.query.weight', 'vit_model.decoder.decoder_layers.5.intermediate.dense.weight', 'vit_model.vit.encoder.layer.4.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.7.intermediate.dense.bias', 'vit_model.vit.encoder.layer.0.attention.attention.query.bias', 'vit_model.vit.encoder.layer.2.intermediate.dense.bias', 'vit_model.vit.encoder.layer.11.attention.output.dense.bias', 'vit_model.vit.encoder.layer.11.attention.attention.query.weight', 'vit_model.vit.encoder.layer.1.output.dense.weight', 'vit_model.vit.embeddings.cls_token', 'vit_model.decoder.decoder_layers.7.output.dense.bias', 'vit_model.vit.encoder.layer.9.attention.attention.key.weight', 'vit_model.vit.encoder.layer.6.layernorm_before.weight', 'vit_model.vit.encoder.layer.7.attention.output.dense.weight', 'vit_model.vit.encoder.layer.4.intermediate.dense.weight', 'vit_model.vit.encoder.layer.1.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.value.bias', 'vit_model.vit.encoder.layer.7.output.dense.weight', 'vit_model.vit.encoder.layer.0.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.6.intermediate.dense.bias', 'vit_model.decoder.decoder_layers.5.attention.attention.query.weight', 'vit_model.decoder.decoder_layers.4.layernorm_before.weight', 'vit_model.vit.encoder.layer.7.attention.attention.value.weight', 'vit_model.vit.encoder.layer.6.attention.attention.query.bias', 'vit_model.vit.encoder.layer.11.attention.attention.value.bias', 'vit_model.vit.encoder.layer.5.attention.attention.key.bias', 'vit_model.vit.encoder.layer.8.output.dense.bias', 'vit_model.vit.encoder.layer.7.layernorm_before.bias', 'vit_model.decoder.decoder_layers.6.layernorm_before.weight', 'vit_model.vit.encoder.layer.11.layernorm_after.weight', 'vit_model.decoder.decoder_layers.0.layernorm_after.weight', 'vit_model.decoder.decoder_layers.2.attention.attention.query.bias', 'vit_model.vit.encoder.layer.0.intermediate.dense.bias', 'vit_model.vit.encoder.layer.9.attention.attention.value.weight', 'vit_model.vit.encoder.layer.1.layernorm_before.bias', 'vit_model.decoder.decoder_layers.7.layernorm_after.bias', 'vit_model.decoder.decoder_layers.7.layernorm_after.weight', 'vit_model.vit.encoder.layer.8.layernorm_after.bias', 'vit_model.vit.encoder.layer.11.intermediate.dense.weight', 'vit_model.vit.encoder.layer.0.attention.attention.value.weight', 'vit_model.vit.encoder.layer.3.attention.attention.value.weight', '_encoder.merge_lstm.bias_hh_l0', '_decoder_cell.weight_hh', 'vit_model.decoder.decoder_layers.1.output.dense.weight', 'vit_model.vit.encoder.layer.3.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.4.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.5.layernorm_before.bias', 'vit_model.decoder.decoder_layers.3.intermediate.dense.bias', 'vit_model.vit.encoder.layer.5.layernorm_before.bias', 'vit_model.decoder.decoder_layers.2.attention.output.dense.weight', 'vit_model.vit.encoder.layer.5.output.dense.bias', 'vit_model.vit.encoder.layer.2.output.dense.weight', 'vit_model.vit.encoder.layer.9.layernorm_after.weight', 'vit_model.decoder.decoder_layers.6.attention.attention.query.weight', 'vit_model.vit.encoder.layer.1.layernorm_before.weight', 'vit_model.vit.encoder.layer.4.attention.attention.value.weight', 'vit_model.vit.encoder.layer.4.layernorm_after.weight', 'vit_model.vit.encoder.layer.4.output.dense.bias', 'vit_model.vit.encoder.layer.4.layernorm_before.bias', 'vit_model.decoder.decoder_layers.0.layernorm_before.weight', 'vit_model.decoder.decoder_layers.1.layernorm_after.weight', 'vit_model.decoder.decoder_layers.3.layernorm_after.bias', 'vit_model.vit.encoder.layer.1.attention.attention.key.weight', 'vit_model.vit.encoder.layer.8.layernorm_before.weight', 'vit_model.vit.encoder.layer.6.layernorm_after.weight', 'vit_model.vit.encoder.layer.8.attention.attention.key.weight', 'vit_model.vit.encoder.layer.3.layernorm_before.bias', 'vit_model.vit.layernorm.bias', 'vit_model.vit.encoder.layer.4.output.dense.weight', 'vit_model.vit.encoder.layer.2.layernorm_after.weight', 'vit_model.decoder.decoder_layers.0.intermediate.dense.weight', 'vit_model.decoder.decoder_layers.1.attention.output.dense.weight', 'vit_model.vit.encoder.layer.10.attention.attention.value.weight', 'vit_model.vit.encoder.layer.4.attention.output.dense.bias', 'vit_model.vit.encoder.layer.4.layernorm_after.bias', 'vit_model.vit.encoder.layer.10.intermediate.dense.weight', 'vit_model.vit.encoder.layer.8.attention.attention.value.weight', 'vit_model.decoder.decoder_layers.6.attention.output.dense.bias', 'vit_model.vit.encoder.layer.6.intermediate.dense.weight', 'vit_model.vit.encoder.layer.11.output.dense.bias', 'vit_model.vit.encoder.layer.8.layernorm_after.weight', 'vit_model.vit.encoder.layer.1.output.dense.bias', 'vit_model.decoder.decoder_layers.7.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.5.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.5.attention.attention.key.bias', 'vit_model.vit.encoder.layer.9.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.5.attention.attention.query.bias', 'vit_model.vit.encoder.layer.5.intermediate.dense.weight', 'vit_model.vit.encoder.layer.3.layernorm_after.weight', 'vit_model.vit.encoder.layer.2.layernorm_before.bias', 'vit_model.vit.encoder.layer.9.intermediate.dense.bias', 'vit_model.vit.encoder.layer.4.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.4.attention.attention.query.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.key.weight', 'vit_model.vit.encoder.layer.10.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.3.layernorm_after.weight', 'vit_model.decoder.decoder_layers.2.intermediate.dense.bias', 'vit_model.vit.encoder.layer.2.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.4.attention.attention.value.weight', 'vit_model.vit.encoder.layer.5.intermediate.dense.bias', 'vit_model.vit.encoder.layer.7.layernorm_before.weight', 'vit_model.vit.encoder.layer.1.intermediate.dense.weight', 'vit_model.vit.encoder.layer.0.intermediate.dense.weight', 'vit_model.decoder.decoder_layers.0.attention.attention.query.weight', 'vit_model.vit.encoder.layer.0.attention.attention.query.weight', 'vit_model.vit.encoder.layer.6.attention.attention.value.bias', 'vit_model.vit.encoder.layer.7.attention.attention.query.weight', 'vit_model.decoder.decoder_layers.7.attention.output.dense.bias', '_encoder.early_gru.bias_ih_l0', 'vit_model.decoder.decoder_layers.6.attention.attention.query.bias', 'vit_model.vit.encoder.layer.11.intermediate.dense.bias', 'vit_model.decoder.decoder_layers.1.intermediate.dense.bias', 'vit_model.vit.encoder.layer.9.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.4.attention.attention.key.weight', 'vit_model.vit.encoder.layer.5.attention.attention.key.weight', 'vit_model.vit.encoder.layer.6.output.dense.weight', 'vit_model.vit.encoder.layer.3.output.dense.weight', '_encoder.lstm.weight_ih_l0', 'vit_model.vit.encoder.layer.9.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.4.attention.attention.key.bias', 'vit_model.vit.encoder.layer.6.layernorm_before.bias', 'vit_model.vit.encoder.layer.0.attention.attention.key.weight', 'vit_model.vit.encoder.layer.0.attention.attention.key.bias', '_decoder_cell.bias_hh', 'vit_model.vit.encoder.layer.7.intermediate.dense.bias', 'vit_model.vit.encoder.layer.10.intermediate.dense.bias', 'vit_model.vit.encoder.layer.3.intermediate.dense.bias', 'vit_model.vit.encoder.layer.11.layernorm_after.bias', 'vit_model.decoder.decoder_layers.2.output.dense.weight', 'vit_model.vit.encoder.layer.10.output.dense.weight', 'vit_model.decoder.decoder_layers.3.layernorm_before.bias', 'vit_model.vit.encoder.layer.1.attention.attention.value.bias', 'vit_model.vit.encoder.layer.1.layernorm_after.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.value.weight', '_decoder_cell.bias_ih', 'vit_model.decoder.decoder_layers.5.attention.attention.value.weight', 'vit_model.vit.encoder.layer.6.layernorm_after.bias', '_encoder.merge_norm.bias', '_encoder.early_gru.weight_hh_l0', 'vit_model.vit.encoder.layer.1.attention.attention.query.bias', 'vit_model.vit.encoder.layer.9.output.dense.weight', 'vit_model.vit.encoder.layer.11.layernorm_before.weight', 'vit_model.decoder.decoder_layers.7.layernorm_before.weight', 'vit_model.decoder.decoder_pred.bias', 'vit_model.vit.encoder.layer.4.attention.attention.query.weight', 'vit_model.decoder.decoder_layers.4.intermediate.dense.weight', 'vit_model.vit.encoder.layer.8.attention.attention.query.bias', 'vit_model.decoder.decoder_layers.5.attention.output.dense.weight', 'vit_model.vit.encoder.layer.2.attention.attention.key.weight', 'vit_model.vit.encoder.layer.11.output.dense.weight', 'vit_model.decoder.decoder_layers.6.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.2.output.dense.bias', 'vit_model.vit.encoder.layer.8.output.dense.weight', 'vit_model.decoder.decoder_norm.bias', 'vit_model.decoder.decoder_layers.6.layernorm_before.bias', 'vit_model.vit.encoder.layer.11.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.7.output.dense.weight', 'vit_model.vit.encoder.layer.10.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.6.attention.attention.value.weight', 'vit_model.vit.encoder.layer.6.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.2.layernorm_before.weight', 'vit_model.vit.encoder.layer.2.layernorm_before.weight', 'vit_model.decoder.decoder_embed.bias', 'vit_model.vit.encoder.layer.7.attention.attention.query.bias', 'vit_model.vit.encoder.layer.2.attention.output.dense.weight', 'vit_model.vit.encoder.layer.10.layernorm_before.weight', 'vit_model.decoder.decoder_layers.6.attention.attention.key.weight', '_encoder.merge_mlp.bias', 'vit_model.decoder.decoder_layers.1.layernorm_before.weight', 'vit_model.vit.encoder.layer.3.attention.attention.query.weight', 'vit_model.vit.encoder.layer.10.attention.attention.query.weight', 'vit_model.vit.encoder.layer.10.attention.attention.query.bias', 'vit_model.decoder.decoder_pred.weight', 'vit_model.decoder.decoder_layers.1.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.0.attention.attention.value.bias', 'vit_model.vit.encoder.layer.3.attention.attention.key.weight', 'vit_model.vit.encoder.layer.11.attention.attention.value.weight', 'vit_model.decoder.decoder_layers.1.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.0.attention.attention.key.weight', 'vit_model.vit.encoder.layer.10.attention.attention.key.bias', 'vit_model.vit.embeddings.patch_embeddings.projection.weight', 'vit_model.decoder.decoder_layers.7.layernorm_before.bias', '_encoder.merge_mlp.weight', 'vit_model.vit.encoder.layer.8.layernorm_before.bias', 'vit_model.vit.encoder.layer.8.attention.attention.query.weight', 'vit_model.vit.encoder.layer.5.attention.output.dense.weight', 'vit_model.vit.encoder.layer.0.output.dense.weight', 'vit_model.decoder.decoder_layers.5.intermediate.dense.bias', 'vit_model.vit.encoder.layer.9.attention.attention.query.weight', 'vit_model.vit.encoder.layer.1.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.4.layernorm_after.bias', '_encoder.early_gru.bias_hh_l0', 'vit_model.vit.encoder.layer.5.attention.attention.value.weight', 'vit_model.decoder.decoder_layers.0.output.dense.bias', '_encoder.merge_lstm.bias_ih_l0', 'vit_model.vit.encoder.layer.7.layernorm_after.weight', 'vit_model.decoder.decoder_layers.2.layernorm_after.bias', 'vit_model.decoder.decoder_layers.3.attention.attention.query.bias', 'vit_model.decoder.decoder_layers.5.attention.attention.value.bias', 'vit_model.decoder.decoder_layers.5.layernorm_after.bias', 'vit_model.decoder.decoder_layers.3.attention.output.dense.weight', 'vit_model.decoder.decoder_embed.weight', 'vit_model.decoder.decoder_layers.0.layernorm_after.bias', 'vit_model.vit.encoder.layer.0.layernorm_before.bias', 'vit_model.decoder.decoder_layers.2.layernorm_before.bias', 'vit_model.vit.encoder.layer.3.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.4.intermediate.dense.bias', 'vit_model.vit.encoder.layer.0.output.dense.bias', 'vit_model.decoder.decoder_layers.2.layernorm_after.weight', 'vit_model.vit.encoder.layer.1.attention.attention.value.weight', 'vit_model.vit.encoder.layer.7.output.dense.bias', 'vit_model.vit.encoder.layer.6.attention.output.dense.bias', 'vit_model.vit.encoder.layer.9.attention.output.dense.weight', 'vit_model.vit.encoder.layer.7.attention.attention.key.weight', '_encoder.lstm.weight_hh_l0', 'vit_model.vit.encoder.layer.5.attention.attention.query.weight', 'vit_model.vit.encoder.layer.0.attention.output.dense.weight', 'vit_model.vit.encoder.layer.0.layernorm_after.bias', 'vit_model.decoder.decoder_layers.2.intermediate.dense.weight', 'vit_model.vit.encoder.layer.8.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.3.attention.output.dense.bias', 'vit_model.decoder.decoder_layers.2.attention.attention.value.bias', 'vit_model.vit.encoder.layer.2.attention.attention.value.weight', 'vit_model.vit.encoder.layer.6.intermediate.dense.bias', 'vit_model.vit.encoder.layer.2.attention.attention.query.bias', 'vit_model.vit.encoder.layer.3.attention.attention.query.bias', 'vit_model.vit.encoder.layer.8.attention.attention.value.bias', '_encoder.merge_norm.weight', 'vit_model.vit.encoder.layer.9.intermediate.dense.weight', 'vit_model.vit.encoder.layer.8.attention.output.dense.bias', 'vit_model.vit.encoder.layer.10.attention.attention.key.weight', 'vit_model.decoder.decoder_layers.1.attention.attention.query.weight', 'vit_model.vit.encoder.layer.6.attention.output.dense.weight', 'vit_model.decoder.decoder_layers.0.attention.output.dense.weight', 'vit_model.vit.encoder.layer.6.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.1.attention.attention.value.bias', 'vit_model.vit.encoder.layer.9.layernorm_before.bias', 'vit_model.vit.encoder.layer.11.attention.output.dense.weight', 'vit_model.vit.encoder.layer.10.attention.attention.value.bias', 'vit_model.vit.encoder.layer.7.attention.output.dense.bias', 'vit_model.vit.encoder.layer.9.layernorm_after.bias', 'vit_model.decoder.decoder_layers.5.layernorm_before.weight', 'vit_model.vit.encoder.layer.5.layernorm_before.weight', 'vit_model.decoder.decoder_layers.1.intermediate.dense.weight', 'vit_model.decoder.decoder_layers.2.attention.attention.key.bias', 'vit_model.decoder.mask_token', 'vit_model.decoder.decoder_layers.6.layernorm_after.weight', 'vit_model.vit.encoder.layer.5.layernorm_after.weight', 'vit_model.decoder.decoder_layers.4.output.dense.weight', 'vit_model.vit.layernorm.weight', 'vit_model.decoder.decoder_layers.2.attention.attention.key.weight', 'vit_model.vit.encoder.layer.5.layernorm_after.bias', 'vit_model.vit.encoder.layer.0.layernorm_after.weight', 'vit_model.vit.encoder.layer.2.layernorm_after.bias', 'vit_model.vit.encoder.layer.0.layernorm_before.weight', 'vit_model.vit.encoder.layer.9.layernorm_before.weight', 'vit_model.decoder.decoder_layers.5.output.dense.bias', 'vit_model.vit.encoder.layer.1.attention.attention.query.weight', 'vit_model.vit.encoder.layer.1.layernorm_after.weight', 'vit_model.decoder.decoder_layers.7.attention.attention.value.weight', 'vit_model.vit.encoder.layer.4.attention.attention.query.bias', 'vit_model.decoder.decoder_norm.weight', 'vit_model.vit.encoder.layer.9.attention.attention.query.bias', 'vit_model.vit.encoder.layer.3.attention.attention.key.bias', 'vit_model.vit.encoder.layer.7.attention.attention.key.bias', 'vit_model.decoder.decoder_layers.7.attention.attention.value.bias'], {} +2025-03-24 13:21:44,631 - WARNING - allennlp.training.optimizers - When constructing parameter groups, resnet not match any parameter name +2025-03-24 13:21:44,631 - WARNING - allennlp.training.optimizers - When constructing parameter groups, source_embedder not match any parameter name +2025-03-24 13:21:44,631 - WARNING - allennlp.training.optimizers - When constructing parameter groups, encoder.concat_trans_ not match any parameter name +2025-03-24 13:21:44,632 - INFO - allennlp.training.optimizers - Number of trainable parameters: 292199006 +2025-03-24 13:21:44,632 - INFO - allennlp.common.params - trainer.optimizer.infer_type_and_cast = True +2025-03-24 13:21:44,632 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently. +2025-03-24 13:21:44,632 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: +2025-03-24 13:21:44,632 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.001 +2025-03-24 13:21:44,633 - INFO - allennlp.common.registrable - instantiating registered subclass adam of +2025-03-24 13:21:44,635 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = reduce_on_plateau +2025-03-24 13:21:44,635 - INFO - allennlp.common.registrable - instantiating registered subclass reduce_on_plateau of +2025-03-24 13:21:44,635 - INFO - allennlp.common.params - Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently. +2025-03-24 13:21:44,635 - INFO - allennlp.common.params - CURRENTLY DEFINED PARAMETERS: +2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.factor = 0.6 +2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.mode = max +2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.patience = 5 +2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.num_serialized_models_to_keep = 20 +2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.keep_serialized_model_every_num_seconds = None +2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.model_save_interval = None +2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.summary_interval = 100 +2025-03-24 13:21:44,636 - INFO - allennlp.common.params - trainer.histogram_interval = None +2025-03-24 13:21:44,637 - INFO - allennlp.common.params - trainer.should_log_parameter_statistics = True +2025-03-24 13:21:44,637 - INFO - allennlp.common.params - trainer.should_log_learning_rate = False +2025-03-24 13:21:44,637 - INFO - allennlp.common.params - trainer.log_batch_size_period = None +2025-03-24 13:21:44,637 - WARNING - allennlp.training.trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled +2025-03-24 13:21:44,784 - INFO - allennlp.training.trainer - Beginning training. +2025-03-24 13:21:44,784 - INFO - allennlp.training.trainer - Epoch 0/99 +2025-03-24 13:21:44,784 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:21:45,113 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:21:45,114 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:21:45,114 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:21:45,115 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:21:45,115 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 1974 +2025-03-24 13:21:45,115 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:21:45,115 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:21:45,127 - INFO - allennlp.training.trainer - Training +2025-03-24 13:23:05,615 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:23:13,876 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:23:13,876 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:23:13,878 - INFO - allennlp.training.tensorboard_writer - loss | 1.687 | 1.203 +2025-03-24 13:23:13,879 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:23:13,879 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 1974.000 | N/A +2025-03-24 13:23:13,880 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:23:13,880 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.233 +2025-03-24 13:23:13,880 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:23:13,881 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:23:13,881 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.197 +2025-03-24 13:23:13,882 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.367 +2025-03-24 13:23:13,882 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:23:13,883 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:23:14,111 - INFO - allennlp.training.trainer - Epoch duration: 0:01:29.326816 +2025-03-24 13:23:14,112 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:27:23 +2025-03-24 13:23:14,112 - INFO - allennlp.training.trainer - Epoch 1/99 +2025-03-24 13:23:14,112 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:23:14,418 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:23:14,419 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:23:14,419 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:23:14,419 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:23:14,420 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17628 +2025-03-24 13:23:14,420 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:23:14,420 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:23:14,432 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_divide', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_2']], [['g_divide', 'N_1', 'N_2']], [['g_divide', 'N_1', 'N_2']], [['g_bili', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_1']], [['g_divide', 'N_1', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_divide', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']]] +2025-03-24 13:24:31,319 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:24:39,564 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:24:39,565 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:24:39,566 - INFO - allennlp.training.tensorboard_writer - loss | 1.122 | 1.045 +2025-03-24 13:24:39,567 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:24:39,568 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17628.000 | N/A +2025-03-24 13:24:39,568 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:24:39,568 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.353 +2025-03-24 13:24:39,569 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:24:39,569 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:24:39,570 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.167 +2025-03-24 13:24:39,570 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.425 +2025-03-24 13:24:39,571 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:24:39,571 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:24:39,625 - INFO - allennlp.training.trainer - Epoch duration: 0:01:25.512708 +2025-03-24 13:24:39,625 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:22:47 +2025-03-24 13:24:39,625 - INFO - allennlp.training.trainer - Epoch 2/99 +2025-03-24 13:24:39,625 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:24:39,942 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:24:39,953 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1']]] +2025-03-24 13:25:59,334 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:26:07,944 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:26:07,944 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:26:07,946 - INFO - allennlp.training.tensorboard_writer - loss | 1.003 | 0.999 +2025-03-24 13:26:07,946 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:26:07,946 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:26:07,947 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:26:07,947 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.281 +2025-03-24 13:26:07,947 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:26:07,947 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:26:07,948 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.174 +2025-03-24 13:26:07,948 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.418 +2025-03-24 13:26:07,949 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:26:07,949 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:26:08,006 - INFO - allennlp.training.trainer - Epoch duration: 0:01:28.381453 +2025-03-24 13:26:08,006 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:21:50 +2025-03-24 13:26:08,007 - INFO - allennlp.training.trainer - Epoch 3/99 +2025-03-24 13:26:08,007 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:26:08,383 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:26:08,384 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:26:08,397 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_0']], [['g_divide', 'N_0', 'N_1']], [['g_divide', 'N_0', 'N_1']], [['g_divide', 'N_1', 'N_2']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'N_2', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_divide', 'N_2', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'N_1', 'N_0']]] +selected_programs [[['g_equal', 'N_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['gougu_minus', 'N_0', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'V_0', 'N_0']]] +2025-03-24 13:27:26,778 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:27:35,969 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:27:35,969 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:27:35,970 - INFO - allennlp.training.tensorboard_writer - loss | 0.942 | 0.928 +2025-03-24 13:27:35,971 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:27:35,971 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:27:35,971 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:27:35,972 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.312 +2025-03-24 13:27:35,972 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:27:35,973 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:27:35,973 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.173 +2025-03-24 13:27:35,974 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.417 +2025-03-24 13:27:35,974 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:27:35,974 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:27:35,998 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.991514 +2025-03-24 13:27:35,998 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:20:29 +2025-03-24 13:27:35,998 - INFO - allennlp.training.trainer - Epoch 4/99 +2025-03-24 13:27:35,998 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:27:36,307 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:27:36,308 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:27:36,308 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:27:36,308 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:27:36,309 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:27:36,309 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:27:36,309 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:27:36,321 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_minus', 'N_2', 'N_3', 'g_add', 'V_0', 'N_3']], [['g_minus', 'N_2', 'N_3', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_bili', 'N_2', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['gougu_add', 'N_1', 'N_2', 'g_bili', 'N_2', 'N_1', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']]] +selected_programs [[['g_double', 'N_0']], [['g_half', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_add', 'V_0', 'N_2']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_equal', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['gougu_minus', 'N_1', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]] +2025-03-24 13:28:55,277 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:29:04,926 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:29:04,928 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:29:04,929 - INFO - allennlp.training.tensorboard_writer - loss | 0.895 | 0.908 +2025-03-24 13:29:04,930 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:29:04,930 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:29:04,930 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:29:04,931 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.324 +2025-03-24 13:29:04,931 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:29:04,931 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:29:04,932 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.156 +2025-03-24 13:29:04,932 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.432 +2025-03-24 13:29:04,932 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:29:04,933 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:29:04,976 - INFO - allennlp.training.trainer - Epoch duration: 0:01:28.977988 +2025-03-24 13:29:04,976 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:19:23 +2025-03-24 13:29:04,977 - INFO - allennlp.training.trainer - Epoch 5/99 +2025-03-24 13:29:04,977 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:29:05,303 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:29:05,304 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:29:05,304 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:29:05,304 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:29:05,304 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:29:05,305 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:29:05,305 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:29:05,317 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_equal', 'N_0']]] +2025-03-24 13:30:21,718 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:30:29,999 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:30:30,001 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:30:30,002 - INFO - allennlp.training.tensorboard_writer - loss | 0.845 | 0.866 +2025-03-24 13:30:30,003 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:30:30,004 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:30:30,004 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:30:30,004 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.352 +2025-03-24 13:30:30,005 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:30:30,005 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:30:30,006 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.132 +2025-03-24 13:30:30,006 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.443 +2025-03-24 13:30:30,007 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:30:30,007 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:30:30,058 - INFO - allennlp.training.trainer - Epoch duration: 0:01:25.081708 +2025-03-24 13:30:30,058 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:17:09 +2025-03-24 13:30:30,059 - INFO - allennlp.training.trainer - Epoch 6/99 +2025-03-24 13:30:30,059 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:30:30,411 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:30:30,412 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:30:30,412 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:30:30,412 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:30:30,412 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:30:30,413 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:30:30,413 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:30:30,425 - INFO - allennlp.training.trainer - Training +2025-03-24 13:31:44,806 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:31:53,191 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:31:53,191 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:31:53,191 - INFO - allennlp.training.tensorboard_writer - loss | 0.805 | 0.839 +2025-03-24 13:31:53,192 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:31:53,193 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:31:53,193 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:31:53,194 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.348 +2025-03-24 13:31:53,194 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:31:53,194 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:31:53,195 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.130 +2025-03-24 13:31:53,195 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.444 +2025-03-24 13:31:53,195 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:31:53,196 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:31:53,230 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.171465 +2025-03-24 13:31:53,230 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:14:43 +2025-03-24 13:31:53,230 - INFO - allennlp.training.trainer - Epoch 7/99 +2025-03-24 13:31:53,230 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:31:53,647 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:31:53,648 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:31:53,648 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:31:53,649 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:31:53,649 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:31:53,649 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:31:53,649 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:31:53,662 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_add', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_3']], [['gougu_add', 'N_1', 'N_2', 'g_add', 'V_0', 'N_0']], [['g_sin', 'N_2', 'g_divide', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_sin', 'N_2', 'g_mul', 'N_0', 'N_1', 'g_minus', 'V_1', 'V_0']], [['g_equal', 'N_2']], [['g_add', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_equal', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1', 'g_add', 'V_2', 'N_0']], [['g_half', 'N_2']], [['g_minus', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1']], [['cal_circle_area', 'N_0']], [['cal_circle_area', 'N_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'N_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_half', 'N_0', 'g_mul', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']]] +2025-03-24 13:33:08,173 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:33:16,530 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:33:16,531 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:33:16,532 - INFO - allennlp.training.tensorboard_writer - loss | 0.771 | 0.832 +2025-03-24 13:33:16,533 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:33:16,533 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:33:16,534 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:33:16,534 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.378 +2025-03-24 13:33:16,534 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:33:16,535 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:33:16,535 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.135 +2025-03-24 13:33:16,536 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.458 +2025-03-24 13:33:16,536 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:33:16,537 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:33:16,594 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.363477 +2025-03-24 13:33:16,594 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:12:35 +2025-03-24 13:33:16,594 - INFO - allennlp.training.trainer - Epoch 8/99 +2025-03-24 13:33:16,594 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:33:16,935 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:33:16,936 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:33:16,949 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_add', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'C_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_equal', 'N_0']], [['g_equal', 'N_0']], [['g_bili', 'N_1', 'N_2', 'N_3', 'gougu_add', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1', 'g_add', 'V_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_2', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_1']]] +2025-03-24 13:34:30,826 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:34:39,104 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:34:39,104 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:34:39,105 - INFO - allennlp.training.tensorboard_writer - loss | 0.737 | 0.797 +2025-03-24 13:34:39,106 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:34:39,106 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:34:39,107 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:34:39,107 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.381 +2025-03-24 13:34:39,108 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:34:39,108 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:34:39,108 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.111 +2025-03-24 13:34:39,109 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.483 +2025-03-24 13:34:39,109 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:34:39,110 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:34:39,139 - INFO - allennlp.training.trainer - Epoch duration: 0:01:22.545359 +2025-03-24 13:34:39,140 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:10:29 +2025-03-24 13:34:39,140 - INFO - allennlp.training.trainer - Epoch 9/99 +2025-03-24 13:34:39,140 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:34:39,451 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:34:39,452 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:34:39,452 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:34:39,452 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:34:39,452 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:34:39,453 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:34:39,453 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:34:39,465 - INFO - allennlp.training.trainer - Training +2025-03-24 13:35:54,133 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:36:03,253 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:36:03,254 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:36:03,255 - INFO - allennlp.training.tensorboard_writer - loss | 0.718 | 0.801 +2025-03-24 13:36:03,256 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:36:03,256 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:36:03,257 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:36:03,257 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.353 +2025-03-24 13:36:03,257 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:36:03,258 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:36:03,258 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.115 +2025-03-24 13:36:03,258 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.464 +2025-03-24 13:36:03,259 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:36:03,260 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:36:03,291 - INFO - allennlp.training.trainer - Epoch duration: 0:01:24.151439 +2025-03-24 13:36:03,291 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:08:46 +2025-03-24 13:36:03,291 - INFO - allennlp.training.trainer - Epoch 10/99 +2025-03-24 13:36:03,291 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:36:03,619 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:36:03,620 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:36:03,621 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:36:03,621 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:36:03,621 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:36:03,621 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:36:03,621 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:36:03,634 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'C_2']], [['g_minus', 'N_1', 'N_0']], [['g_sin', 'N_3', 'g_divide', 'N_4', 'N_4']], [['g_add', 'N_2', 'N_3']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['gougu_minus', 'N_2', 'N_1', 'g_bili', 'V_0', 'N_2', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_equal', 'N_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_minus', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_1', 'N_2']], [['g_minus', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_1', 'g_half', 'V_0']], [['cal_circle_area', 'N_0']], [['cal_circle_area', 'N_0']], [['g_half', 'N_0', 'cal_circle_area', 'V_0']], [['cal_circle_area', 'N_1', 'g_divide', 'N_0', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'V_0', 'N_2']], [['g_half', 'N_0', 'g_mul', 'V_0', 'N_0']], [['g_mul', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0']], [['g_half', 'N_1', 'g_sin', 'V_0', 'g_mul', 'N_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_mul', 'N_1', 'N_2', 'g_half', 'V_0']]] +2025-03-24 13:37:23,297 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:37:33,096 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:37:33,099 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:37:33,100 - INFO - allennlp.training.tensorboard_writer - loss | 0.680 | 0.790 +2025-03-24 13:37:33,100 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:37:33,100 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:37:33,101 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:37:33,101 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.431 +2025-03-24 13:37:33,101 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:37:33,101 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:37:33,102 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.121 +2025-03-24 13:37:33,102 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.524 +2025-03-24 13:37:33,102 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:37:33,104 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:37:33,146 - INFO - allennlp.training.trainer - Epoch duration: 0:01:29.854896 +2025-03-24 13:37:33,146 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:07:53 +2025-03-24 13:37:33,146 - INFO - allennlp.training.trainer - Epoch 11/99 +2025-03-24 13:37:33,147 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:37:33,518 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:37:33,530 - INFO - allennlp.training.trainer - Training +2025-03-24 13:38:52,808 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:39:01,396 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:39:01,409 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:39:01,411 - INFO - allennlp.training.tensorboard_writer - loss | 0.656 | 0.764 +2025-03-24 13:39:01,412 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:39:01,413 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:39:01,414 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:39:01,414 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.408 +2025-03-24 13:39:01,415 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:39:01,416 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:39:01,416 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.118 +2025-03-24 13:39:01,417 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.498 +2025-03-24 13:39:01,417 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:39:01,418 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:39:01,461 - INFO - allennlp.training.trainer - Epoch duration: 0:01:28.314099 +2025-03-24 13:39:01,461 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:06:42 +2025-03-24 13:39:01,461 - INFO - allennlp.training.trainer - Epoch 12/99 +2025-03-24 13:39:01,461 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:39:01,787 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:39:01,788 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:39:01,788 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:39:01,788 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:39:01,788 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:39:01,788 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:39:01,789 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:39:01,803 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_equal', 'N_1']], [['g_sin', 'N_2', 'g_mul', 'N_3', 'V_0']], [['g_sin', 'N_2', 'g_mul', 'N_0', 'V_0']], [['g_equal', 'N_1']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_1', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_2', 'N_3']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_mul', 'N_2', 'V_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_add', 'N_0', 'N_2', 'g_add', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'V_0', 'N_0']], [['g_tan', 'N_1', 'g_mul', 'V_0', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_double', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']]] +selected_programs [[['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'cal_circle_area', 'V_0']], [['g_divide', 'N_1', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_divide', 'N_1', 'N_2']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_mul', 'N_1', 'N_2', 'g_half', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_mul', 'N_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1', 'g_add', 'V_2', 'N_2']], [['g_double', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_1', 'V_0', 'N_0']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']]] +2025-03-24 13:40:20,583 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:40:29,378 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:40:29,380 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:40:29,381 - INFO - allennlp.training.tensorboard_writer - loss | 0.631 | 0.767 +2025-03-24 13:40:29,382 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:40:29,383 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:40:29,383 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:40:29,384 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.431 +2025-03-24 13:40:29,384 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:40:29,384 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:40:29,385 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.113 +2025-03-24 13:40:29,385 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.511 +2025-03-24 13:40:29,386 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:40:29,386 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:40:29,437 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.975695 +2025-03-24 13:40:29,437 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:05:26 +2025-03-24 13:40:29,437 - INFO - allennlp.training.trainer - Epoch 13/99 +2025-03-24 13:40:29,437 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:40:29,763 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:40:29,764 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:40:29,765 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:40:29,765 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:40:29,765 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:40:29,765 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:40:29,765 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:40:29,779 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'C_2']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['gougu_add', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_1']], [['gougu_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_2', 'gougu_minus', 'V_1', 'N_2']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_1', 'g_add', 'V_0', 'N_0']]] +selected_programs [[['g_mul', 'N_1', 'N_3', 'g_mul', 'V_0', 'N_3']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_add', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0']]] +2025-03-24 13:41:48,644 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:41:57,084 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:41:57,085 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:41:57,086 - INFO - allennlp.training.tensorboard_writer - loss | 0.591 | 0.752 +2025-03-24 13:41:57,086 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.504 +2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.096 +2025-03-24 13:41:57,087 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.560 +2025-03-24 13:41:57,088 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:41:57,089 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:41:57,145 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.708350 +2025-03-24 13:41:57,145 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:04:07 +2025-03-24 13:41:57,146 - INFO - allennlp.training.trainer - Epoch 14/99 +2025-03-24 13:41:57,146 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:41:57,599 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:41:57,600 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:41:57,600 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:41:57,600 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:41:57,600 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:41:57,600 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:41:57,601 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:41:57,613 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['gougu_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_1', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']]] +2025-03-24 13:43:15,814 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:43:24,259 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:43:24,260 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:43:24,260 - INFO - allennlp.training.tensorboard_writer - loss | 0.566 | 0.735 +2025-03-24 13:43:24,261 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:43:24,261 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:43:24,261 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:43:24,261 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.543 +2025-03-24 13:43:24,261 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:43:24,262 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:43:24,262 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.096 +2025-03-24 13:43:24,262 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.578 +2025-03-24 13:43:24,262 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:43:24,263 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:43:24,311 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.165195 +2025-03-24 13:43:24,311 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:02:43 +2025-03-24 13:43:24,311 - INFO - allennlp.training.trainer - Epoch 15/99 +2025-03-24 13:43:24,311 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:43:24,634 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:43:24,635 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:43:24,635 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:43:24,635 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:43:24,635 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:43:24,636 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:43:24,636 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:43:24,648 - INFO - allennlp.training.trainer - Training +2025-03-24 13:44:42,871 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:44:51,532 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:44:51,534 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:44:51,535 - INFO - allennlp.training.tensorboard_writer - loss | 0.533 | 0.725 +2025-03-24 13:44:51,535 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:44:51,535 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:44:51,536 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:44:51,536 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.507 +2025-03-24 13:44:51,537 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:44:51,538 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:44:51,538 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.102 +2025-03-24 13:44:51,538 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.554 +2025-03-24 13:44:51,538 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:44:51,539 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:44:51,565 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.254279 +2025-03-24 13:44:51,565 - INFO - allennlp.training.trainer - Estimated training time remaining: 2:01:20 +2025-03-24 13:44:51,566 - INFO - allennlp.training.trainer - Epoch 16/99 +2025-03-24 13:44:51,566 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:44:51,919 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:44:51,920 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:44:51,921 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:44:51,921 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:44:51,921 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:44:51,921 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:44:51,921 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:44:51,934 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['gougu_minus', 'N_0', 'N_1']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_double', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_half', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0']]] +2025-03-24 13:46:10,443 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:46:18,882 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:46:18,883 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:46:18,884 - INFO - allennlp.training.tensorboard_writer - loss | 0.509 | 0.737 +2025-03-24 13:46:18,884 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:46:18,884 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:46:18,886 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:46:18,886 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.511 +2025-03-24 13:46:18,887 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:46:18,887 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:46:18,887 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.109 +2025-03-24 13:46:18,887 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.554 +2025-03-24 13:46:18,888 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:46:18,889 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:46:19,004 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.437937 +2025-03-24 13:46:19,004 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:59:57 +2025-03-24 13:46:19,004 - INFO - allennlp.training.trainer - Epoch 17/99 +2025-03-24 13:46:19,004 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:46:19,355 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:46:19,356 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:46:19,357 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:46:19,357 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:46:19,357 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:46:19,357 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:46:19,357 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:46:19,370 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_2', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'gougu_minus', 'N_0', 'V_1', 'g_double', 'V_2']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'gougu_minus', 'N_0', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0']]] +selected_programs [[['g_equal', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_add', 'C_6', 'N_3', 'g_add', 'V_0', 'N_4']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_2']], [['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0']]] +selected_programs [[['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_equal', 'C_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_2', 'g_double', 'N_1', 'g_minus', 'V_1', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_equal', 'N_0']]] +2025-03-24 13:47:36,937 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:47:45,532 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:47:45,538 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:47:45,539 - INFO - allennlp.training.tensorboard_writer - loss | 0.485 | 0.764 +2025-03-24 13:47:45,539 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:47:45,541 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:47:45,541 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:47:45,541 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.560 +2025-03-24 13:47:45,542 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:47:45,542 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:47:45,542 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.092 +2025-03-24 13:47:45,542 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.571 +2025-03-24 13:47:45,543 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:47:45,543 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:47:45,594 - INFO - allennlp.training.trainer - Epoch duration: 0:01:26.590571 +2025-03-24 13:47:45,595 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:58:30 +2025-03-24 13:47:45,595 - INFO - allennlp.training.trainer - Epoch 18/99 +2025-03-24 13:47:45,595 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:47:46,026 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:47:46,027 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:47:46,028 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:47:46,028 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:47:46,028 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:47:46,028 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:47:46,028 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:47:46,042 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_0', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_2', 'g_add', 'V_0', 'N_3']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']]] +selected_programs [[['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_add', 'N_0', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_sin', 'N_0', 'g_divide', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'C_2', 'g_minus', 'V_2', 'C_2']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_2']], [['g_tan', 'N_0', 'g_mul', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_equal', 'N_0']]] +selected_programs [[['g_equal', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_equal', 'N_3']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]] +2025-03-24 13:49:05,894 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:49:15,788 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:49:15,817 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:49:15,818 - INFO - allennlp.training.tensorboard_writer - loss | 0.469 | 0.773 +2025-03-24 13:49:15,819 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:49:15,819 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:49:15,820 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:49:15,820 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.550 +2025-03-24 13:49:15,821 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:49:15,821 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:49:15,821 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.122 +2025-03-24 13:49:15,822 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.565 +2025-03-24 13:49:15,823 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:49:15,823 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:49:15,845 - INFO - allennlp.training.trainer - Epoch duration: 0:01:30.250664 +2025-03-24 13:49:15,845 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:57:18 +2025-03-24 13:49:15,846 - INFO - allennlp.training.trainer - Epoch 19/99 +2025-03-24 13:49:15,846 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:49:16,150 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:49:16,151 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:49:16,151 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:49:16,152 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:49:16,152 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:49:16,152 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:49:16,152 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:49:16,165 - INFO - allennlp.training.trainer - Training +2025-03-24 13:50:31,543 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:50:39,965 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:50:39,976 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:50:39,978 - INFO - allennlp.training.tensorboard_writer - loss | 0.445 | 0.776 +2025-03-24 13:50:39,980 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:50:39,980 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:50:39,980 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:50:39,980 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.537 +2025-03-24 13:50:39,980 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:50:39,981 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:50:39,981 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.094 +2025-03-24 13:50:39,982 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.568 +2025-03-24 13:50:39,983 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:50:39,983 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:50:40,028 - INFO - allennlp.training.trainer - Epoch duration: 0:01:24.182009 +2025-03-24 13:50:40,028 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:55:40 +2025-03-24 13:50:40,028 - INFO - allennlp.training.trainer - Epoch 20/99 +2025-03-24 13:50:40,028 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:50:40,339 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:50:40,340 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:50:40,340 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:50:40,340 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:50:40,341 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:50:40,341 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:50:40,341 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:50:40,354 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_add', 'V_0', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_equal', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'C_2']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0']]] +2025-03-24 13:51:59,973 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:52:09,726 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:52:09,735 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:52:09,736 - INFO - allennlp.training.tensorboard_writer - loss | 0.441 | 0.764 +2025-03-24 13:52:09,736 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:52:09,737 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:52:09,738 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:52:09,738 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.559 +2025-03-24 13:52:09,739 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:52:09,739 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:52:09,739 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.104 +2025-03-24 13:52:09,740 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.582 +2025-03-24 13:52:09,740 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:52:09,741 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:52:09,832 - INFO - allennlp.training.trainer - Epoch duration: 0:01:29.804281 +2025-03-24 13:52:09,832 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:54:25 +2025-03-24 13:52:09,832 - INFO - allennlp.training.trainer - Epoch 21/99 +2025-03-24 13:52:09,832 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:52:10,264 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:52:10,265 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:52:10,279 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_2', 'N_3', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_half', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_equal', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_add', 'V_0', 'C_2']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1', 'g_double', 'V_2']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]] +2025-03-24 13:53:24,936 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:53:34,903 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:53:34,904 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:53:34,905 - INFO - allennlp.training.tensorboard_writer - loss | 0.412 | 0.775 +2025-03-24 13:53:34,906 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:53:34,906 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:53:34,907 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:53:34,907 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.602 +2025-03-24 13:53:34,908 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:53:34,908 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:53:34,908 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.102 +2025-03-24 13:53:34,909 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.588 +2025-03-24 13:53:34,909 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:53:34,910 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:53:34,914 - INFO - allennlp.training.trainer - Epoch duration: 0:01:25.081265 +2025-03-24 13:53:34,914 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:52:52 +2025-03-24 13:53:34,914 - INFO - allennlp.training.trainer - Epoch 22/99 +2025-03-24 13:53:34,914 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:53:35,247 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:53:35,248 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:53:35,248 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:53:35,248 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:53:35,249 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:53:35,249 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:53:35,249 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:53:35,262 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_mul', 'N_2', 'N_3', 'g_minus', 'V_0', 'N_1', 'g_mul', 'V_1', 'N_3']], [['g_half', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_1', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0']]] +2025-03-24 13:54:53,092 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:55:01,776 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:55:01,787 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:55:01,787 - INFO - allennlp.training.tensorboard_writer - loss | 0.389 | 0.763 +2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.613 +2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:55:01,789 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:55:01,790 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.097 +2025-03-24 13:55:01,790 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.599 +2025-03-24 13:55:01,791 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:55:01,791 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:55:01,823 - INFO - allennlp.training.trainer - Epoch duration: 0:01:26.908723 +2025-03-24 13:55:01,823 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:51:25 +2025-03-24 13:55:01,823 - INFO - allennlp.training.trainer - Epoch 23/99 +2025-03-24 13:55:01,823 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:55:02,158 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:55:02,159 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:55:02,159 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:55:02,159 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:55:02,159 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:55:02,160 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:55:02,160 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:55:02,172 - INFO - allennlp.training.trainer - Training +2025-03-24 13:56:17,007 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:56:25,616 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:56:25,618 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:56:25,619 - INFO - allennlp.training.tensorboard_writer - loss | 0.367 | 0.823 +2025-03-24 13:56:25,620 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:56:25,620 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:56:25,621 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:56:25,621 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.608 +2025-03-24 13:56:25,621 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:56:25,622 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:56:25,622 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.117 +2025-03-24 13:56:25,622 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.599 +2025-03-24 13:56:25,623 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:56:25,623 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:56:25,649 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.825453 +2025-03-24 13:56:25,649 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:49:49 +2025-03-24 13:56:25,649 - INFO - allennlp.training.trainer - Epoch 24/99 +2025-03-24 13:56:25,649 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:56:26,014 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:56:26,022 - INFO - allennlp.training.trainer - Training +2025-03-24 13:57:39,815 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:57:48,334 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:57:48,370 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:57:48,371 - INFO - allennlp.training.tensorboard_writer - loss | 0.359 | 0.803 +2025-03-24 13:57:48,372 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:57:48,372 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:57:48,372 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:57:48,373 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.604 +2025-03-24 13:57:48,373 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:57:48,373 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:57:48,374 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.110 +2025-03-24 13:57:48,374 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.599 +2025-03-24 13:57:48,375 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:57:48,375 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:57:48,436 - INFO - allennlp.training.trainer - Epoch duration: 0:01:22.786983 +2025-03-24 13:57:48,436 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:48:10 +2025-03-24 13:57:48,436 - INFO - allennlp.training.trainer - Epoch 25/99 +2025-03-24 13:57:48,436 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:57:48,819 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:57:48,820 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:57:48,820 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:57:48,820 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:57:48,820 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:57:48,821 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:57:48,821 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:57:48,834 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'gougu_minus', 'V_0', 'V_1', 'g_double', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'V_2', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_2', 'g_add', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_add', 'N_0', 'N_1', 'g_minus', 'N_0', 'V_0', 'gougu_minus', 'N_0', 'V_1', 'g_double', 'V_2']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1', 'g_minus', 'N_0', 'V_2']], [['g_double', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']]] +2025-03-24 13:59:02,415 - INFO - allennlp.training.trainer - Validating +2025-03-24 13:59:10,932 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 13:59:10,932 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 13:59:10,933 - INFO - allennlp.training.tensorboard_writer - loss | 0.341 | 0.807 +2025-03-24 13:59:10,934 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 13:59:10,935 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 13:59:10,935 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 13:59:10,935 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.599 +2025-03-24 13:59:10,936 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 13:59:10,936 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 13:59:10,936 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.119 +2025-03-24 13:59:10,937 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.601 +2025-03-24 13:59:10,938 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 13:59:10,938 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 13:59:10,987 - INFO - allennlp.training.trainer - Epoch duration: 0:01:22.550905 +2025-03-24 13:59:10,987 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:46:33 +2025-03-24 13:59:10,987 - INFO - allennlp.training.trainer - Epoch 26/99 +2025-03-24 13:59:10,987 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 13:59:11,331 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 13:59:11,332 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 13:59:11,332 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 13:59:11,333 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 13:59:11,333 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 13:59:11,333 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 13:59:11,333 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 13:59:11,346 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_tan', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_add', 'N_0', 'N_2']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_minus', 'N_1', 'N_0', 'gougu_minus', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]] +2025-03-24 14:00:24,864 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:00:33,332 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:00:33,333 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:00:33,333 - INFO - allennlp.training.tensorboard_writer - loss | 0.319 | 0.828 +2025-03-24 14:00:33,335 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:00:33,335 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:00:33,335 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:00:33,335 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.618 +2025-03-24 14:00:33,336 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:00:33,336 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:00:33,337 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.111 +2025-03-24 14:00:33,337 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.597 +2025-03-24 14:00:33,338 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:00:33,338 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:00:33,352 - INFO - allennlp.training.trainer - Epoch duration: 0:01:22.364923 +2025-03-24 14:00:33,352 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:44:55 +2025-03-24 14:00:33,353 - INFO - allennlp.training.trainer - Epoch 27/99 +2025-03-24 14:00:33,353 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:00:33,671 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:00:33,672 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:00:33,672 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:00:33,672 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:00:33,672 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:00:33,673 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:00:33,673 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:00:33,686 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'N_4', 'g_mul', 'V_1', 'N_2']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0']]] +2025-03-24 14:01:47,939 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:01:56,577 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:01:56,577 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:01:56,578 - INFO - allennlp.training.tensorboard_writer - loss | 0.304 | 0.851 +2025-03-24 14:01:56,579 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:01:56,579 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:01:56,580 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:01:56,580 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.625 +2025-03-24 14:01:56,580 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:01:56,580 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:01:56,581 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.125 +2025-03-24 14:01:56,581 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.603 +2025-03-24 14:01:56,582 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:01:56,582 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:01:56,621 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.268260 +2025-03-24 14:01:56,621 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:43:21 +2025-03-24 14:01:56,621 - INFO - allennlp.training.trainer - Epoch 28/99 +2025-03-24 14:01:56,621 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:01:57,019 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:01:57,020 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:01:57,020 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:01:57,020 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:01:57,021 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:01:57,021 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:01:57,021 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:01:57,034 - INFO - allennlp.training.trainer - Training +2025-03-24 14:03:11,775 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:03:20,384 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:03:20,403 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:03:20,406 - INFO - allennlp.training.tensorboard_writer - loss | 0.291 | 0.860 +2025-03-24 14:03:20,406 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:03:20,407 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:03:20,407 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:03:20,407 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.648 +2025-03-24 14:03:20,408 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:03:20,408 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:03:20,409 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.109 +2025-03-24 14:03:20,409 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.602 +2025-03-24 14:03:20,410 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:03:20,410 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:03:20,430 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.808762 +2025-03-24 14:03:20,430 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:41:50 +2025-03-24 14:03:20,430 - INFO - allennlp.training.trainer - Epoch 29/99 +2025-03-24 14:03:20,430 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:03:20,779 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:03:20,780 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:03:20,780 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:03:20,781 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:03:20,781 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:03:20,781 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:03:20,781 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:03:20,794 - INFO - allennlp.training.trainer - Training +2025-03-24 14:04:35,554 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:04:44,097 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:04:44,098 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:04:44,098 - INFO - allennlp.training.tensorboard_writer - loss | 0.269 | 0.852 +2025-03-24 14:04:44,099 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:04:44,099 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:04:44,100 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:04:44,100 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.636 +2025-03-24 14:04:44,100 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:04:44,101 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:04:44,101 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.114 +2025-03-24 14:04:44,102 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.613 +2025-03-24 14:04:44,102 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:04:44,103 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:04:53,573 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'. +2025-03-24 14:04:57,831 - INFO - allennlp.training.trainer - Epoch duration: 0:01:37.401059 +2025-03-24 14:04:57,832 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:40:50 +2025-03-24 14:04:57,832 - INFO - allennlp.training.trainer - Epoch 30/99 +2025-03-24 14:04:57,832 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:04:58,205 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:04:58,206 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:04:58,206 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:04:58,206 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:04:58,206 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:04:58,207 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:04:58,207 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:04:58,221 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_4', 'N_3', 'N_5']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_bili', 'N_3', 'N_2', 'N_4']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_1']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_minus', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_divide', 'C_4', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_add', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'gougu_add', 'V_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']]] +selected_programs [[['g_mul', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_double', 'N_0', 'g_double', 'V_1']], [['g_half', 'N_0']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_half', 'V_1', 'g_add', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_2', 'V_1']], [['g_double', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_mul', 'N_0', 'N_2', 'g_divide', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]] +2025-03-24 14:06:12,243 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:06:20,828 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:06:20,828 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:06:20,829 - INFO - allennlp.training.tensorboard_writer - loss | 0.254 | 0.898 +2025-03-24 14:06:20,829 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:06:20,829 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:06:20,830 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:06:20,830 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.641 +2025-03-24 14:06:20,830 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:06:20,830 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:06:20,832 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.104 +2025-03-24 14:06:20,832 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.591 +2025-03-24 14:06:20,832 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:06:20,833 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:06:20,859 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.026463 +2025-03-24 14:06:20,859 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:39:16 +2025-03-24 14:06:20,859 - INFO - allennlp.training.trainer - Epoch 31/99 +2025-03-24 14:06:20,859 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:06:21,154 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:06:21,155 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:06:21,166 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_add', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_3', 'N_2', 'N_1', 'g_double', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_sin', 'N_3', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_tan', 'N_1', 'g_add', 'V_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_1', 'N_0', 'gougu_add', 'V_0', 'N_0']], [['g_equal', 'N_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_tan', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_sin', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_minus', 'N_2', 'N_0', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0']], [['g_half', 'N_0', 'g_bili', 'N_1', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_add', 'V_0', 'V_1']], [['g_half', 'N_1', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_divide', 'N_0', 'N_3', 'g_divide', 'N_4', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_0', 'N_2', 'g_add', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_0']], [['g_half', 'N_1', 'gougu_add', 'N_1', 'V_0', 'g_add', 'V_1', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'V_1']]] +2025-03-24 14:07:35,194 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:07:43,698 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:07:43,699 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:07:43,699 - INFO - allennlp.training.tensorboard_writer - loss | 0.244 | 0.855 +2025-03-24 14:07:43,700 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:07:43,700 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:07:43,701 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:07:43,701 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.650 +2025-03-24 14:07:43,701 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:07:43,702 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:07:43,702 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.119 +2025-03-24 14:07:43,702 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.604 +2025-03-24 14:07:43,703 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:07:43,703 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:07:43,755 - INFO - allennlp.training.trainer - Epoch duration: 0:01:22.895791 +2025-03-24 14:07:43,755 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:37:42 +2025-03-24 14:07:43,755 - INFO - allennlp.training.trainer - Epoch 32/99 +2025-03-24 14:07:43,755 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:07:44,084 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:07:44,085 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:07:44,085 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:07:44,086 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:07:44,086 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:07:44,086 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:07:44,086 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:07:44,099 - INFO - allennlp.training.trainer - Training +2025-03-24 14:09:02,203 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:09:12,076 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:09:12,077 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:09:12,078 - INFO - allennlp.training.tensorboard_writer - loss | 0.241 | 0.915 +2025-03-24 14:09:12,079 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:09:12,079 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:09:12,080 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:09:12,080 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.621 +2025-03-24 14:09:12,080 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:09:12,081 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:09:12,081 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.119 +2025-03-24 14:09:12,081 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.590 +2025-03-24 14:09:12,082 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:09:12,083 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:09:12,102 - INFO - allennlp.training.trainer - Epoch duration: 0:01:28.347464 +2025-03-24 14:09:12,103 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:36:20 +2025-03-24 14:09:12,103 - INFO - allennlp.training.trainer - Epoch 33/99 +2025-03-24 14:09:12,103 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:09:12,438 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:09:12,439 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:09:12,439 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:09:12,440 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:09:12,440 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:09:12,440 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:09:12,440 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:09:12,454 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_half', 'N_2', 'g_add', 'N_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_add', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']]] +selected_programs [[['g_mul', 'N_2', 'N_3', 'g_mul', 'V_0', 'V_0']], [['g_half', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_cos', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0']]] +selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_1', 'V_1']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_add', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]] +2025-03-24 14:10:32,716 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:10:42,650 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:10:42,651 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:10:42,652 - INFO - allennlp.training.tensorboard_writer - loss | 0.225 | 0.915 +2025-03-24 14:10:42,653 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:10:42,653 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:10:42,654 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:10:42,654 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.645 +2025-03-24 14:10:42,654 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:10:42,655 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:10:42,655 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.117 +2025-03-24 14:10:42,656 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.594 +2025-03-24 14:10:42,656 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:10:42,656 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:10:42,676 - INFO - allennlp.training.trainer - Epoch duration: 0:01:30.573084 +2025-03-24 14:10:42,676 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:35:02 +2025-03-24 14:10:42,676 - INFO - allennlp.training.trainer - Epoch 34/99 +2025-03-24 14:10:42,676 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:10:42,969 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:10:42,971 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:10:42,984 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_divide', 'N_0', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_bili', 'N_4', 'N_3', 'N_5']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_bili', 'N_4', 'N_5', 'N_5']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_add', 'V_0', 'N_2']], [['g_equal', 'N_1']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_mul', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_mul', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_divide', 'C_4', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'N_0', 'N_2', 'g_add', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_1', 'g_half', 'V_1']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_mul', 'N_0', 'N_2', 'g_add', 'V_0', 'N_2']], [['g_mul', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_0']]] +selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_divide', 'N_0', 'N_1']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_minus', 'N_2', 'N_1', 'g_divide', 'V_0', 'N_2']], [['g_equal', 'N_0', 'g_divide', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_mul', 'V_1', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_bili', 'N_2', 'N_0', 'N_1', 'gougu_add', 'V_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_sin', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'N_0', 'N_2', 'g_add', 'V_1', 'N_2']], [['g_equal', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'N_0']]] +selected_programs [[['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_tan', 'V_0', 'g_mul', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_2']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_add', 'N_0', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]] +2025-03-24 14:12:03,109 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:12:11,752 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:12:11,753 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:12:11,753 - INFO - allennlp.training.tensorboard_writer - loss | 0.208 | 0.923 +2025-03-24 14:12:11,754 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:12:11,755 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:12:11,756 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:12:11,756 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.677 +2025-03-24 14:12:11,756 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:12:11,756 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:12:11,757 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.114 +2025-03-24 14:12:11,757 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.592 +2025-03-24 14:12:11,757 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:12:11,758 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:12:11,790 - INFO - allennlp.training.trainer - Epoch duration: 0:01:29.113678 +2025-03-24 14:12:11,790 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:33:41 +2025-03-24 14:12:11,790 - INFO - allennlp.training.trainer - Epoch 35/99 +2025-03-24 14:12:11,790 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:12:12,133 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:12:12,134 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:12:12,134 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:12:12,134 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:12:12,134 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:12:12,135 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:12:12,135 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:12:12,148 - INFO - allennlp.training.trainer - Training +2025-03-24 14:13:26,122 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:13:35,886 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:13:35,887 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - loss | 0.200 | 0.956 +2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.665 +2025-03-24 14:13:35,888 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:13:35,889 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:13:35,889 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.096 +2025-03-24 14:13:35,889 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.618 +2025-03-24 14:13:35,889 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:13:35,892 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:13:46,234 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'. +2025-03-24 14:13:51,570 - INFO - allennlp.training.trainer - Epoch duration: 0:01:39.779376 +2025-03-24 14:13:51,571 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:32:38 +2025-03-24 14:13:51,571 - INFO - allennlp.training.trainer - Epoch 36/99 +2025-03-24 14:13:51,571 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:13:51,994 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:13:51,995 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:13:52,012 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_equal', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'N_2', 'N_3', 'g_divide', 'V_0', 'N_4']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']]] +selected_programs [[['g_mul', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_half', 'N_0']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'C_2']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'N_2', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_half', 'V_1', 'g_add', 'V_0', 'V_2']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_2', 'gougu_add', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_0', 'g_divide', 'V_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]] +selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_4', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'cal_circle_area', 'V_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_divide', 'N_1', 'N_2']], [['g_half', 'N_0', 'g_equal', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_mul', 'V_1', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_sin', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_double', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']]] +2025-03-24 14:15:12,729 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:15:22,761 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:15:22,762 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:15:22,763 - INFO - allennlp.training.tensorboard_writer - loss | 0.192 | 0.955 +2025-03-24 14:15:22,764 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:15:22,764 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:15:22,765 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:15:22,765 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.673 +2025-03-24 14:15:22,766 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:15:22,766 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:15:22,767 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.107 +2025-03-24 14:15:22,767 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.622 +2025-03-24 14:15:22,768 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:15:22,769 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:15:31,538 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'. +2025-03-24 14:15:40,479 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.908280 +2025-03-24 14:15:40,480 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:31:49 +2025-03-24 14:15:40,480 - INFO - allennlp.training.trainer - Epoch 37/99 +2025-03-24 14:15:40,481 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:15:40,818 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:15:40,818 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:15:40,819 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:15:40,819 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:15:40,819 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:15:40,819 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:15:40,819 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:15:40,832 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_3', 'N_0', 'g_mul', 'N_1', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_3', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'N_1', 'N_0', 'gougu_minus', 'V_1', 'V_0', 'gougu_add', 'V_2', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_2', 'N_1', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['gougu_add', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_mul', 'N_0', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']]] +selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_half', 'N_0', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0', 'gougu_minus', 'N_0', 'V_1', 'g_double', 'V_2']], [['g_equal', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_equal', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_add', 'N_0', 'N_0']]] +2025-03-24 14:16:58,182 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:17:08,211 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:17:08,212 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:17:08,213 - INFO - allennlp.training.tensorboard_writer - loss | 0.178 | 0.999 +2025-03-24 14:17:08,214 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:17:08,214 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:17:08,215 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:17:08,215 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.677 +2025-03-24 14:17:08,215 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:17:08,216 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:17:08,216 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.109 +2025-03-24 14:17:08,217 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.625 +2025-03-24 14:17:08,217 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:17:08,217 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:17:18,200 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'. +2025-03-24 14:17:28,006 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.525251 +2025-03-24 14:17:28,006 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:30:54 +2025-03-24 14:17:28,007 - INFO - allennlp.training.trainer - Epoch 38/99 +2025-03-24 14:17:28,007 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:17:28,397 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:17:28,398 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:17:28,415 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_double', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_mul', 'N_1', 'N_2', 'g_add', 'V_0', 'N_3', 'g_divide', 'V_1', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_2', 'N_2', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_1', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_2', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2', 'g_half', 'V_1', 'gougu_minus', 'N_1', 'V_2']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_equal', 'N_0']], [['g_divide', 'N_1', 'N_2', 'g_mul', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_3', 'g_add', 'N_2', 'V_0', 'g_bili', 'V_1', 'N_1', 'N_0']], [['g_minus', 'C_4', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'N_2', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_divide', 'N_2', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['gougu_minus', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_1', 'V_0']], [['g_minus', 'C_3', 'C_2']], [['gougu_minus', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0']]] +selected_programs [[['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['cal_circle_area', 'N_0']], [['g_bili', 'N_0', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['cal_circle_area', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_double', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'C_5']], [['cal_circle_area', 'N_0', 'g_divide', 'N_1', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_2', 'N_3', 'g_add', 'V_0', 'N_3']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_3', 'N_5', 'N_9', 'g_divide', 'N_8', 'N_7', 'g_add', 'V_0', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']]] +selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1', 'gougu_minus', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_double', 'N_1']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'N_0', 'g_double', 'V_1']]] +2025-03-24 14:18:45,828 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:18:55,098 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:18:55,098 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:18:55,099 - INFO - allennlp.training.tensorboard_writer - loss | 0.180 | 0.984 +2025-03-24 14:18:55,099 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:18:55,100 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:18:55,101 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:18:55,101 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.674 +2025-03-24 14:18:55,102 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:18:55,102 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:18:55,102 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.117 +2025-03-24 14:18:55,103 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.621 +2025-03-24 14:18:55,104 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:18:55,104 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:19:05,102 - INFO - allennlp.training.trainer - Epoch duration: 0:01:37.095233 +2025-03-24 14:19:05,103 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:29:41 +2025-03-24 14:19:05,103 - INFO - allennlp.training.trainer - Epoch 39/99 +2025-03-24 14:19:05,103 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:19:05,476 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:19:05,477 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:19:05,493 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'cal_circle_area', 'V_0']], [['g_divide', 'N_1', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_divide', 'N_1', 'N_2']], [['g_double', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0', 'g_mul', 'V_1', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'N_2', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_divide', 'V_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_sin', 'V_1']], [['g_equal', 'N_1']], [['g_double', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']]] +2025-03-24 14:20:23,480 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:20:32,160 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:20:32,161 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:20:32,162 - INFO - allennlp.training.tensorboard_writer - loss | 0.174 | 1.013 +2025-03-24 14:20:32,162 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:20:32,163 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:20:32,164 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:20:32,164 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.687 +2025-03-24 14:20:32,164 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:20:32,165 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:20:32,165 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.121 +2025-03-24 14:20:32,165 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.591 +2025-03-24 14:20:32,165 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:20:32,166 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:20:32,209 - INFO - allennlp.training.trainer - Epoch duration: 0:01:27.106212 +2025-03-24 14:20:32,209 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:28:11 +2025-03-24 14:20:32,209 - INFO - allennlp.training.trainer - Epoch 40/99 +2025-03-24 14:20:32,209 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:20:32,541 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:20:32,542 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:20:32,554 - INFO - allennlp.training.trainer - Training +2025-03-24 14:21:47,053 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:21:55,838 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:21:55,838 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:21:55,839 - INFO - allennlp.training.tensorboard_writer - loss | 0.171 | 1.032 +2025-03-24 14:21:55,839 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:21:55,840 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:21:55,840 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:21:55,841 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.682 +2025-03-24 14:21:55,841 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:21:55,842 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:21:55,842 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.116 +2025-03-24 14:21:55,843 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.617 +2025-03-24 14:21:55,843 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:21:55,844 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:22:06,234 - INFO - allennlp.training.trainer - Epoch duration: 0:01:34.024312 +2025-03-24 14:22:06,234 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:26:51 +2025-03-24 14:22:06,235 - INFO - allennlp.training.trainer - Epoch 41/99 +2025-03-24 14:22:06,235 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:22:06,618 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:22:06,619 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:22:06,619 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:22:06,619 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:22:06,620 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:22:06,620 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:22:06,620 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:22:06,634 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_2', 'g_minus', 'N_0', 'N_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_half', 'N_2', 'g_divide', 'N_3', 'N_4', 'g_minus', 'V_0', 'V_1']], [['g_minus', 'N_2', 'N_3', 'g_bili', 'N_1', 'N_2', 'V_0']], [['g_sin', 'N_0', 'g_divide', 'N_2', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0', 'g_half', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_add', 'N_0', 'V_0', 'g_add', 'N_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_sin', 'V_0', 'g_divide', 'N_0', 'V_1']], [['gougu_minus', 'N_1', 'N_2', 'g_add', 'V_0', 'N_1', 'g_mul', 'V_1', 'N_3']], [['g_minus', 'N_0', 'N_1', 'gougu_add', 'N_0', 'V_0', 'g_divide', 'N_1', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_equal', 'N_0']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1', 'g_double', 'V_2']], [['g_half', 'N_2', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_2', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0', 'g_add', 'V_1', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['cal_circle_area', 'N_1', 'g_divide', 'C_1', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_half', 'N_0', 'cal_circle_area', 'V_0', 'g_divide', 'C_2', 'C_4', 'g_mul', 'V_1', 'V_2']], [['cal_circle_area', 'N_1', 'g_divide', 'N_0', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_1', 'N_2', 'g_add', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_3', 'g_half', 'V_2']], [['g_mul', 'N_0', 'V_0', 'g_half', 'V_1']], [['g_mul', 'N_1', 'N_3']], [['g_mul', 'N_0', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_2', 'g_add', 'V_0', 'N_0', 'g_half', 'V_1', 'g_mul', 'V_2', 'N_3']]] +2025-03-24 14:23:20,969 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:23:29,579 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:23:29,579 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:23:29,580 - INFO - allennlp.training.tensorboard_writer - loss | 0.160 | 1.019 +2025-03-24 14:23:29,580 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:23:29,580 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:23:29,580 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:23:29,581 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.668 +2025-03-24 14:23:29,582 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:23:29,582 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:23:29,583 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.122 +2025-03-24 14:23:29,583 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.608 +2025-03-24 14:23:29,583 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:23:29,583 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:23:29,609 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.374207 +2025-03-24 14:23:29,609 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:25:16 +2025-03-24 14:23:29,609 - INFO - allennlp.training.trainer - Epoch 42/99 +2025-03-24 14:23:29,609 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:23:29,917 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:23:29,918 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:23:29,918 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:23:29,918 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:23:29,918 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:23:29,919 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:23:29,919 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:23:29,932 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_equal', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_4', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_1', 'g_mul', 'V_0', 'N_6']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_cos', 'V_0', 'g_mul', 'C_5', 'V_1']], [['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']]] +selected_programs [[['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_add', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_add', 'V_0', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_equal', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']]] +2025-03-24 14:24:44,556 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:24:54,677 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:24:54,677 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:24:54,677 - INFO - allennlp.training.tensorboard_writer - loss | 0.152 | 1.039 +2025-03-24 14:24:54,679 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:24:54,679 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:24:54,679 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:24:54,680 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.684 +2025-03-24 14:24:54,680 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:24:54,680 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:24:54,681 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.109 +2025-03-24 14:24:54,682 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.625 +2025-03-24 14:24:54,682 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:24:54,683 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:25:05,497 - INFO - allennlp.training.trainer - Epoch duration: 0:01:35.888198 +2025-03-24 14:25:05,498 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:23:58 +2025-03-24 14:25:05,498 - INFO - allennlp.training.trainer - Epoch 43/99 +2025-03-24 14:25:05,499 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:25:05,854 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:25:05,855 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:25:05,869 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0', 'g_half', 'V_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']]] +selected_programs [[['g_minus', 'C_3', 'N_0', 'g_add', 'V_0', 'N_0', 'g_add', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_half', 'N_1', 'g_minus', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'gougu_minus', 'V_0', 'V_1', 'g_double', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_1', 'g_sin', 'V_0', 'g_mul', 'N_2', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_minus', 'N_1', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']]] +selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_1', 'V_1', 'g_divide', 'V_2', 'N_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_1', 'N_3', 'g_divide', 'V_1', 'V_2']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'N_2', 'N_0', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_1', 'g_add', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_0', 'g_mul', 'V_1', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_bili', 'N_2', 'V_0', 'N_1']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0', 'g_add', 'V_1', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_tan', 'N_1', 'g_mul', 'V_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['gougu_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['gougu_add', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_0', 'V_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0', 'g_add', 'V_1', 'N_2']], [['g_double', 'N_0']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0']]] +selected_programs [[['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_1', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_half', 'V_1', 'g_add', 'V_0', 'V_2']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_minus', 'V_2', 'N_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_1', 'g_add', 'V_0', 'N_0', 'g_minus', 'V_1', 'N_2', 'g_minus', 'V_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]] +2025-03-24 14:26:20,753 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:26:29,499 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:26:29,500 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:26:29,500 - INFO - allennlp.training.tensorboard_writer - loss | 0.141 | 1.065 +2025-03-24 14:26:29,501 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:26:29,502 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:26:29,502 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:26:29,503 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.697 +2025-03-24 14:26:29,503 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:26:29,503 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:26:29,504 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.126 +2025-03-24 14:26:29,505 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.608 +2025-03-24 14:26:29,505 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:26:29,506 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:26:29,538 - INFO - allennlp.training.trainer - Epoch duration: 0:01:24.039338 +2025-03-24 14:26:29,538 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:22:24 +2025-03-24 14:26:29,538 - INFO - allennlp.training.trainer - Epoch 44/99 +2025-03-24 14:26:29,538 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:26:29,833 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:26:29,834 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:26:29,834 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:26:29,834 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:26:29,835 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:26:29,835 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:26:29,835 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:26:29,855 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_1', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'N_2', 'g_equal', 'N_1']], [['g_cos', 'N_0', 'g_mul', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']]] +selected_programs [[['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_divide', 'N_0', 'N_2', 'g_divide', 'N_1', 'V_0', 'g_add', 'N_1', 'V_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_4', 'N_3', 'N_5']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_1', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_4', 'N_3', 'N_5', 'g_add', 'V_0', 'N_5', 'g_minus', 'N_5', 'V_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'N_2', 'V_0']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0', 'g_add', 'V_1', 'V_1', 'g_add', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_mul', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_divide', 'N_0', 'V_0', 'g_half', 'N_1', 'g_minus', 'V_2', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_divide', 'C_4', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0', 'g_minus', 'N_1', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_add', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_2']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_sin', 'N_2', 'g_divide', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_1', 'g_add', 'V_2', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_1']]] +2025-03-24 14:27:44,248 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:27:52,963 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:27:52,963 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:27:52,963 - INFO - allennlp.training.tensorboard_writer - loss | 0.120 | 1.062 +2025-03-24 14:27:52,964 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:27:52,965 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:27:52,965 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:27:52,965 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.705 +2025-03-24 14:27:52,966 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:27:52,966 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:27:52,967 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.118 +2025-03-24 14:27:52,967 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.625 +2025-03-24 14:27:52,968 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:27:52,968 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:28:03,377 - INFO - allennlp.training.trainer - Epoch duration: 0:01:33.839376 +2025-03-24 14:28:03,378 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:21:02 +2025-03-24 14:28:03,378 - INFO - allennlp.training.trainer - Epoch 45/99 +2025-03-24 14:28:03,379 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:28:03,741 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:28:03,742 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:28:03,743 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:28:03,743 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:28:03,743 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:28:03,743 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:28:03,743 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:28:03,757 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_double', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_mul', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_mul', 'N_1', 'N_2', 'g_add', 'V_0', 'N_3', 'g_divide', 'V_1', 'N_1']], [['g_add', 'N_0', 'N_2', 'g_bili', 'V_0', 'N_2', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'V_0', 'N_1', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_2', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['gougu_minus', 'N_0', 'N_1', 'g_add', 'V_0', 'N_1']], [['g_minus', 'N_0', 'N_2', 'g_minus', 'N_1', 'N_2', 'g_bili', 'N_3', 'V_0', 'V_1', 'g_minus', 'V_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_equal', 'N_0']], [['g_divide', 'N_1', 'N_2', 'g_mul', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_3', 'g_add', 'V_0', 'N_2', 'g_add', 'V_1', 'N_3']], [['g_minus', 'C_3', 'N_0']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'N_2', 'V_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_divide', 'N_2', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_divide', 'N_2', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_divide', 'N_2', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_1', 'V_0']], [['g_divide', 'N_0', 'N_1']]] +selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'cal_circle_area', 'V_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_divide', 'N_1', 'N_2']], [['g_double', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0', 'g_mul', 'V_1', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'V_0', 'N_0']], [['g_equal', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'N_2', 'N_0', 'g_bili', 'N_1', 'N_0', 'V_0']], [['g_double', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_divide', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']]] +2025-03-24 14:29:18,720 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:29:27,412 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:29:27,413 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:29:27,414 - INFO - allennlp.training.tensorboard_writer - loss | 0.096 | 1.112 +2025-03-24 14:29:27,415 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:29:27,415 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:29:27,415 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:29:27,415 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.705 +2025-03-24 14:29:27,416 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:29:27,416 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:29:27,417 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.115 +2025-03-24 14:29:27,417 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.613 +2025-03-24 14:29:27,417 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:29:27,418 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:29:37,548 - INFO - allennlp.training.trainer - Epoch duration: 0:01:34.169543 +2025-03-24 14:29:37,549 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:19:41 +2025-03-24 14:29:37,549 - INFO - allennlp.training.trainer - Epoch 46/99 +2025-03-24 14:29:37,550 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:29:37,898 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:29:37,899 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:29:37,914 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'C_2']], [['g_half', 'N_0', 'g_add', 'V_0', 'N_1', 'g_minus', 'N_1', 'V_1']], [['gougu_add', 'N_0', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_0', 'g_add', 'N_0', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_2', 'N_1', 'g_sin', 'V_0', 'g_mul', 'N_0', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_cos', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_minus', 'N_0', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'N_0', 'N_1', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']]] +selected_programs [[['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_4', 'N_3', 'N_5']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_2', 'N_1', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_add', 'N_4', 'N_5', 'g_bili', 'N_3', 'N_5', 'V_0']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'N_2', 'V_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0', 'g_add', 'V_1', 'V_1', 'g_add', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_mul', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_divide', 'C_4', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'N_0', 'N_2', 'g_add', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_tan', 'N_0', 'g_tan', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_mul', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_0']]] +selected_programs [[['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_equal', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0', 'g_half', 'V_2']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'N_0', 'V_0']], [['gougu_minus', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_double', 'N_2']], [['g_add', 'N_1', 'N_2', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0', 'g_add', 'V_2', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_2', 'N_0', 'g_add', 'V_0', 'N_1', 'g_add', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'C_5']], [['g_add', 'N_0', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_1', 'g_add', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_1', 'V_0', 'N_2', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_double', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']]] +2025-03-24 14:30:52,488 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:31:01,187 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:31:01,188 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:31:01,188 - INFO - allennlp.training.tensorboard_writer - loss | 0.087 | 1.121 +2025-03-24 14:31:01,189 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:31:01,190 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:31:01,190 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:31:01,191 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.698 +2025-03-24 14:31:01,191 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:31:01,191 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:31:01,191 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.115 +2025-03-24 14:31:01,192 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.642 +2025-03-24 14:31:01,193 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:31:01,193 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:31:11,411 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'. +2025-03-24 14:31:16,545 - INFO - allennlp.training.trainer - Epoch duration: 0:01:38.996021 +2025-03-24 14:31:16,546 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:18:24 +2025-03-24 14:31:16,546 - INFO - allennlp.training.trainer - Epoch 47/99 +2025-03-24 14:31:16,546 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:31:16,906 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:31:16,907 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:31:16,921 - INFO - allennlp.training.trainer - Training +2025-03-24 14:32:31,775 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:32:40,440 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:32:40,441 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:32:40,441 - INFO - allennlp.training.tensorboard_writer - loss | 0.082 | 1.134 +2025-03-24 14:32:40,443 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:32:40,443 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:32:40,444 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:32:40,444 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.693 +2025-03-24 14:32:40,444 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:32:40,445 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:32:40,445 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.104 +2025-03-24 14:32:40,445 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.635 +2025-03-24 14:32:40,445 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:32:40,446 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:32:52,612 - INFO - allennlp.training.trainer - Epoch duration: 0:01:36.065327 +2025-03-24 14:32:52,612 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:17:03 +2025-03-24 14:32:52,612 - INFO - allennlp.training.trainer - Epoch 48/99 +2025-03-24 14:32:52,613 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:32:52,982 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:32:52,983 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:32:52,997 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_divide', 'N_2', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_bili', 'N_2', 'N_3', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_1', 'V_0']], [['g_divide', 'N_1', 'N_2']], [['g_divide', 'N_2', 'N_1']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_2', 'V_0']], [['g_double', 'N_0']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_2', 'V_0']], [['gougu_minus', 'N_2', 'N_1', 'g_bili', 'V_0', 'N_1', 'N_3']], [['gougu_add', 'N_1', 'N_2', 'g_divide', 'N_2', 'V_0']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_tan', 'V_0']], [['g_divide', 'N_1', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_3', 'V_0', 'g_divide', 'V_1', 'N_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_sin', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_0']]] +2025-03-24 14:34:07,627 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:34:16,323 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:34:16,324 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:34:16,325 - INFO - allennlp.training.tensorboard_writer - loss | 0.078 | 1.159 +2025-03-24 14:34:16,326 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:34:16,326 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:34:16,327 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:34:16,327 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.707 +2025-03-24 14:34:16,328 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:34:16,328 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:34:16,328 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.123 +2025-03-24 14:34:16,329 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.631 +2025-03-24 14:34:16,329 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:34:16,330 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:34:26,656 - INFO - allennlp.training.trainer - Epoch duration: 0:01:34.043945 +2025-03-24 14:34:26,657 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:15:39 +2025-03-24 14:34:26,657 - INFO - allennlp.training.trainer - Epoch 49/99 +2025-03-24 14:34:26,657 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:34:27,059 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:34:27,060 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:34:27,061 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:34:27,061 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:34:27,061 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:34:27,061 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:34:27,061 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:34:27,075 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0', 'g_half', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_equal', 'N_0']], [['g_mul', 'N_2', 'N_5']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_minus', 'N_1', 'V_1']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'C_2', 'g_half', 'N_0', 'g_half', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_double', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_half', 'V_0', 'g_minus', 'V_1', 'V_2']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1', 'gougu_add', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2', 'gougu_add', 'V_0', 'N_0']]] +2025-03-24 14:35:48,312 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:35:57,120 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:35:57,120 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:35:57,121 - INFO - allennlp.training.tensorboard_writer - loss | 0.081 | 1.165 +2025-03-24 14:35:57,122 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:35:57,122 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:35:57,123 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:35:57,123 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.696 +2025-03-24 14:35:57,123 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:35:57,124 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:35:57,124 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.113 +2025-03-24 14:35:57,124 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.633 +2025-03-24 14:35:57,125 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:35:57,125 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:36:06,907 - INFO - allennlp.training.trainer - Epoch duration: 0:01:40.249765 +2025-03-24 14:36:06,908 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:14:22 +2025-03-24 14:36:06,908 - INFO - allennlp.training.trainer - Epoch 50/99 +2025-03-24 14:36:06,908 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:36:07,239 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:36:07,240 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:36:07,241 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:36:07,241 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:36:07,241 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:36:07,241 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:36:07,241 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:36:07,254 - INFO - allennlp.training.trainer - Training +2025-03-24 14:37:25,430 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:37:35,565 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:37:35,566 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:37:35,567 - INFO - allennlp.training.tensorboard_writer - loss | 0.074 | 1.180 +2025-03-24 14:37:35,568 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:37:35,568 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:37:35,569 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:37:35,569 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.704 +2025-03-24 14:37:35,569 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:37:35,570 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:37:35,570 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.111 +2025-03-24 14:37:35,571 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.623 +2025-03-24 14:37:35,571 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:37:35,571 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:37:45,879 - INFO - allennlp.training.trainer - Epoch duration: 0:01:38.970530 +2025-03-24 14:37:45,879 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:13:02 +2025-03-24 14:37:45,879 - INFO - allennlp.training.trainer - Epoch 51/99 +2025-03-24 14:37:45,880 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:37:46,287 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:37:46,287 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:37:46,288 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:37:46,288 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:37:46,288 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:37:46,288 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:37:46,288 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:37:46,301 - INFO - allennlp.training.trainer - Training +2025-03-24 14:39:06,059 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:39:14,788 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:39:14,789 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:39:14,789 - INFO - allennlp.training.tensorboard_writer - loss | 0.068 | 1.190 +2025-03-24 14:39:14,791 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:39:14,791 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:39:14,791 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:39:14,791 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.700 +2025-03-24 14:39:14,792 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:39:14,793 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:39:14,794 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.115 +2025-03-24 14:39:14,795 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.634 +2025-03-24 14:39:14,795 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:39:14,796 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:39:25,681 - INFO - allennlp.training.trainer - Epoch duration: 0:01:39.801897 +2025-03-24 14:39:25,682 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:11:42 +2025-03-24 14:39:25,682 - INFO - allennlp.training.trainer - Epoch 52/99 +2025-03-24 14:39:25,683 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:39:26,036 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:39:26,037 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:39:26,049 - INFO - allennlp.training.trainer - Training +2025-03-24 14:40:40,144 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:40:48,831 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:40:48,832 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:40:48,833 - INFO - allennlp.training.tensorboard_writer - loss | 0.066 | 1.202 +2025-03-24 14:40:48,833 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:40:48,834 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:40:48,834 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:40:48,835 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.693 +2025-03-24 14:40:48,835 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:40:48,835 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:40:48,836 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.121 +2025-03-24 14:40:48,836 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.606 +2025-03-24 14:40:48,837 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:40:48,837 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:40:48,908 - INFO - allennlp.training.trainer - Epoch duration: 0:01:23.226120 +2025-03-24 14:40:48,909 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:10:07 +2025-03-24 14:40:48,909 - INFO - allennlp.training.trainer - Epoch 53/99 +2025-03-24 14:40:48,909 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:40:49,214 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:40:49,215 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:40:49,216 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:40:49,216 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:40:49,216 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:40:49,216 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:40:49,216 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:40:49,229 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_minus', 'N_0', 'V_1', 'g_double', 'V_2']], [['g_half', 'N_0']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_half', 'V_1', 'g_add', 'V_0', 'V_2']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'N_0', 'g_add', 'V_1', 'N_0']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'gougu_minus', 'N_2', 'V_1', 'g_divide', 'V_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]] +selected_programs [[['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_add', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_1', 'V_1']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1', 'g_minus', 'N_0', 'V_2']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']]] +selected_programs [[['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0', 'g_minus', 'V_2', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1', 'gougu_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]] +selected_programs [[['g_double', 'N_0', 'g_sin', 'V_0', 'g_divide', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_add', 'V_1', 'N_0', 'g_minus', 'V_2', 'V_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']]] +2025-03-24 14:42:04,098 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:42:12,867 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:42:12,868 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:42:12,869 - INFO - allennlp.training.tensorboard_writer - loss | 0.056 | 1.197 +2025-03-24 14:42:12,870 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:42:12,870 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:42:12,871 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:42:12,871 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.713 +2025-03-24 14:42:12,872 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:42:12,872 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:42:12,872 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.123 +2025-03-24 14:42:12,872 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.625 +2025-03-24 14:42:12,873 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:42:12,874 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:42:24,080 - INFO - allennlp.training.trainer - Epoch duration: 0:01:35.171115 +2025-03-24 14:42:24,081 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:08:42 +2025-03-24 14:42:24,081 - INFO - allennlp.training.trainer - Epoch 54/99 +2025-03-24 14:42:24,081 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:42:24,435 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:42:24,436 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:42:24,436 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:42:24,436 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:42:24,437 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:42:24,437 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:42:24,437 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:42:24,449 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_equal', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0', 'g_half', 'V_2']], [['gougu_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['gougu_minus', 'N_2', 'N_1', 'g_add', 'V_0', 'N_2', 'g_add', 'V_1', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_add', 'N_0', 'N_0', 'g_divide', 'V_1', 'V_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_1', 'V_0', 'N_2', 'g_half', 'V_1', 'gougu_minus', 'V_2', 'N_3']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_minus', 'N_0', 'V_0', 'gougu_add', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']]] +2025-03-24 14:43:38,822 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:43:47,613 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:43:47,614 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:43:47,615 - INFO - allennlp.training.tensorboard_writer - loss | 0.049 | 1.226 +2025-03-24 14:43:47,616 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:43:47,616 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:43:47,616 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:43:47,617 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.707 +2025-03-24 14:43:47,617 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:43:47,618 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:43:47,618 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.118 +2025-03-24 14:43:47,618 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.627 +2025-03-24 14:43:47,619 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:43:47,619 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:43:58,586 - INFO - allennlp.training.trainer - Epoch duration: 0:01:34.505436 +2025-03-24 14:43:58,587 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:07:16 +2025-03-24 14:43:58,587 - INFO - allennlp.training.trainer - Epoch 55/99 +2025-03-24 14:43:58,587 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:43:58,979 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:43:58,980 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:43:58,980 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:43:58,980 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:43:58,981 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:43:58,981 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:43:58,981 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:43:58,995 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_sin', 'N_0', 'g_divide', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_add', 'N_1', 'N_2', 'g_add', 'V_1', 'V_0', 'g_half', 'V_2']], [['g_divide', 'N_0', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_minus', 'N_2', 'N_1', 'g_add', 'V_0', 'N_0', 'g_add', 'V_1', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_add', 'N_0', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_add', 'N_0', 'N_0', 'g_add', 'V_1', 'V_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_1', 'g_minus', 'V_1', 'N_2', 'g_half', 'V_2']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'N_0', 'V_0', 'g_half', 'V_1']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']]] +selected_programs [[['g_double', 'N_3', 'gougu_minus', 'V_0', 'N_0', 'g_mul', 'V_1', 'N_3']], [['g_half', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_1', 'g_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']]] +2025-03-24 14:45:13,195 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:45:21,997 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:45:21,997 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:45:21,998 - INFO - allennlp.training.tensorboard_writer - loss | 0.048 | 1.226 +2025-03-24 14:45:21,999 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:45:21,999 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:45:21,999 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:45:21,999 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.709 +2025-03-24 14:45:22,000 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:45:22,001 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:45:22,001 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.117 +2025-03-24 14:45:22,001 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.620 +2025-03-24 14:45:22,001 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:45:22,002 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:45:32,521 - INFO - allennlp.training.trainer - Epoch duration: 0:01:33.933025 +2025-03-24 14:45:32,522 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:05:50 +2025-03-24 14:45:32,523 - INFO - allennlp.training.trainer - Epoch 56/99 +2025-03-24 14:45:32,523 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:45:32,875 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:45:32,876 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:45:32,876 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:45:32,876 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:45:32,876 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:45:32,876 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:45:32,877 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:45:32,890 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1', 'g_double', 'V_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'gougu_add', 'V_0', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]] +2025-03-24 14:46:46,753 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:46:55,489 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:46:55,490 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:46:55,491 - INFO - allennlp.training.tensorboard_writer - loss | 0.046 | 1.248 +2025-03-24 14:46:55,491 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:46:55,492 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:46:55,493 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:46:55,493 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.708 +2025-03-24 14:46:55,493 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:46:55,493 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:46:55,494 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.109 +2025-03-24 14:46:55,494 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.635 +2025-03-24 14:46:55,494 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:46:55,494 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:47:18,596 - INFO - allennlp.training.trainer - Epoch duration: 0:01:46.073355 +2025-03-24 14:47:18,597 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:04:32 +2025-03-24 14:47:18,597 - INFO - allennlp.training.trainer - Epoch 57/99 +2025-03-24 14:47:18,597 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:47:18,961 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:47:18,962 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:47:18,962 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:47:18,962 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:47:18,962 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:47:18,962 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:47:18,963 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:47:18,976 - INFO - allennlp.training.trainer - Training +2025-03-24 14:48:35,195 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:48:43,914 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:48:43,915 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:48:43,915 - INFO - allennlp.training.tensorboard_writer - loss | 0.043 | 1.246 +2025-03-24 14:48:43,916 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:48:43,917 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:48:43,917 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:48:43,917 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.714 +2025-03-24 14:48:43,918 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:48:43,918 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:48:43,918 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.124 +2025-03-24 14:48:43,919 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.621 +2025-03-24 14:48:43,919 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:48:43,920 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:49:08,186 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.588647 +2025-03-24 14:49:08,187 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:03:16 +2025-03-24 14:49:08,187 - INFO - allennlp.training.trainer - Epoch 58/99 +2025-03-24 14:49:08,187 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:49:08,587 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:49:08,588 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:49:08,588 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:49:08,588 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:49:08,588 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:49:08,588 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:49:08,589 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:49:08,604 - INFO - allennlp.training.trainer - Training +2025-03-24 14:50:24,124 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:50:32,978 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:50:32,979 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - loss | 0.043 | 1.273 +2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.723 +2025-03-24 14:50:32,980 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:50:32,982 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:50:32,982 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.126 +2025-03-24 14:50:32,983 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.626 +2025-03-24 14:50:32,983 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:50:32,983 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:50:56,970 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.783204 +2025-03-24 14:50:56,971 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:01:59 +2025-03-24 14:50:56,971 - INFO - allennlp.training.trainer - Epoch 59/99 +2025-03-24 14:50:56,971 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:50:57,318 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:50:57,319 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:50:57,319 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:50:57,319 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:50:57,320 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:50:57,320 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:50:57,320 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:50:57,334 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_minus', 'N_0', 'V_0', 'gougu_add', 'V_0', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_add', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0']]] +2025-03-24 14:52:14,797 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:52:23,751 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:52:23,751 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - loss | 0.038 | 1.264 +2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.724 +2025-03-24 14:52:23,752 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:52:23,753 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:52:23,753 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.126 +2025-03-24 14:52:23,753 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.627 +2025-03-24 14:52:23,753 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:52:23,755 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:52:48,227 - INFO - allennlp.training.trainer - Epoch duration: 0:01:51.256274 +2025-03-24 14:52:48,228 - INFO - allennlp.training.trainer - Estimated training time remaining: 1:00:42 +2025-03-24 14:52:48,228 - INFO - allennlp.training.trainer - Epoch 60/99 +2025-03-24 14:52:48,229 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:52:48,579 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:52:48,580 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:52:48,580 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:52:48,581 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:52:48,581 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17660 +2025-03-24 14:52:48,581 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:52:48,581 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:52:48,595 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['g_equal', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_equal', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_add', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']]] +selected_programs [[['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_add', 'V_1', 'V_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']]] +2025-03-24 14:54:04,648 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:54:13,595 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:54:13,595 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:54:13,597 - INFO - allennlp.training.tensorboard_writer - loss | 0.036 | 1.268 +2025-03-24 14:54:13,598 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:54:13,598 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17660.000 | N/A +2025-03-24 14:54:13,598 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:54:13,599 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.728 +2025-03-24 14:54:13,600 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:54:13,600 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:54:13,601 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.123 +2025-03-24 14:54:13,601 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.635 +2025-03-24 14:54:13,602 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:54:13,603 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:54:38,334 - INFO - allennlp.training.trainer - Epoch duration: 0:01:50.105851 +2025-03-24 14:54:38,335 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:59:23 +2025-03-24 14:54:38,335 - INFO - allennlp.training.trainer - Epoch 61/99 +2025-03-24 14:54:38,335 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:54:38,691 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:54:38,692 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:54:38,692 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:54:38,693 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:54:38,693 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 14:54:38,693 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:54:38,693 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:54:38,706 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_2', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'C_2']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'N_1', 'V_1']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_1', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']]] +2025-03-24 14:55:54,064 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:56:02,928 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:56:02,929 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:56:02,930 - INFO - allennlp.training.tensorboard_writer - loss | 0.035 | 1.285 +2025-03-24 14:56:02,930 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:56:02,930 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 14:56:02,932 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:56:02,932 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.720 +2025-03-24 14:56:02,932 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:56:02,932 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:56:02,932 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.115 +2025-03-24 14:56:02,933 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.640 +2025-03-24 14:56:02,933 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:56:02,933 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:56:27,127 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.792393 +2025-03-24 14:56:27,128 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:58:02 +2025-03-24 14:56:27,128 - INFO - allennlp.training.trainer - Epoch 62/99 +2025-03-24 14:56:27,128 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:56:27,474 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:56:27,475 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:56:27,475 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:56:27,475 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:56:27,475 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 14:56:27,475 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:56:27,476 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:56:27,488 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'N_0', 'N_2', 'g_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['cal_circle_area', 'N_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_minus', 'N_0', 'V_0', 'cal_circle_area', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_half', 'V_0', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_double', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'C_5']], [['cal_circle_area', 'N_0', 'g_divide', 'N_1', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_1', 'N_2', 'N_3']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_2', 'N_0']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_4', 'N_3', 'N_5', 'g_minus', 'V_0', 'N_5', 'g_minus', 'N_5', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_minus', 'V_1', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_minus', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']]] +2025-03-24 14:57:42,461 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:57:51,358 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:57:51,358 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - loss | 0.034 | 1.277 +2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.724 +2025-03-24 14:57:51,359 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:57:51,360 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:57:51,360 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.122 +2025-03-24 14:57:51,360 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.617 +2025-03-24 14:57:51,361 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:57:51,363 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 14:58:16,350 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.222078 +2025-03-24 14:58:16,350 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:56:41 +2025-03-24 14:58:16,350 - INFO - allennlp.training.trainer - Epoch 63/99 +2025-03-24 14:58:16,350 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 14:58:16,692 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 14:58:16,692 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 14:58:16,693 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 14:58:16,693 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 14:58:16,693 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 14:58:16,693 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 14:58:16,693 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 14:58:16,704 - INFO - allennlp.training.trainer - Training +2025-03-24 14:59:32,239 - INFO - allennlp.training.trainer - Validating +2025-03-24 14:59:41,114 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 14:59:41,114 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 14:59:41,114 - INFO - allennlp.training.tensorboard_writer - loss | 0.032 | 1.291 +2025-03-24 14:59:41,115 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 14:59:41,115 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 14:59:41,116 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 14:59:41,116 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.715 +2025-03-24 14:59:41,117 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 14:59:41,118 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 14:59:41,118 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.126 +2025-03-24 14:59:41,118 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.633 +2025-03-24 14:59:41,119 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 14:59:41,120 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:00:06,036 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.685559 +2025-03-24 15:00:06,037 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:55:19 +2025-03-24 15:00:06,037 - INFO - allennlp.training.trainer - Epoch 64/99 +2025-03-24 15:00:06,037 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:00:06,443 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:00:06,444 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:00:06,444 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:00:06,445 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:00:06,445 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:00:06,445 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:00:06,445 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:00:06,460 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_half', 'N_0', 'gougu_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_equal', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']]] +2025-03-24 15:01:22,311 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:01:31,000 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:01:31,002 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:01:31,002 - INFO - allennlp.training.tensorboard_writer - loss | 0.032 | 1.293 +2025-03-24 15:01:31,002 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:01:31,004 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:01:31,005 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:01:31,006 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.706 +2025-03-24 15:01:31,006 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:01:31,007 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:01:31,007 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.104 +2025-03-24 15:01:31,007 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.645 +2025-03-24 15:01:31,007 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:01:31,008 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:01:46,580 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'. +2025-03-24 15:02:10,923 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'. +2025-03-24 15:02:19,504 - INFO - allennlp.training.trainer - Epoch duration: 0:02:13.467008 +2025-03-24 15:02:19,505 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:54:09 +2025-03-24 15:02:19,505 - INFO - allennlp.training.trainer - Epoch 65/99 +2025-03-24 15:02:19,505 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:02:19,927 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:02:19,928 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:02:19,928 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:02:19,929 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:02:19,929 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:02:19,929 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:02:19,929 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:02:19,947 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'N_1', 'g_add', 'V_0', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'N_0', 'C_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'N_0', 'V_0']], [['g_add', 'N_0', 'C_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']]] +selected_programs [[['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_equal', 'V_0', 'g_add', 'V_0', 'V_1']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']]] +2025-03-24 15:03:36,233 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:03:45,125 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:03:45,125 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - loss | 0.030 | 1.304 +2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.723 +2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:03:45,126 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:03:45,127 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.119 +2025-03-24 15:03:45,128 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.650 +2025-03-24 15:03:45,128 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:03:45,130 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:04:00,855 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'. +2025-03-24 15:04:23,785 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'. +2025-03-24 15:04:32,439 - INFO - allennlp.training.trainer - Epoch duration: 0:02:12.934145 +2025-03-24 15:04:32,439 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:52:57 +2025-03-24 15:04:32,439 - INFO - allennlp.training.trainer - Epoch 66/99 +2025-03-24 15:04:32,439 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:04:32,791 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:04:32,792 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:04:32,793 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:04:32,793 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:04:32,793 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:04:32,793 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:04:32,793 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:04:32,808 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']]] +2025-03-24 15:05:47,755 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:05:57,265 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:05:57,266 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:05:57,266 - INFO - allennlp.training.tensorboard_writer - loss | 0.029 | 1.318 +2025-03-24 15:05:57,266 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:05:57,266 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.713 +2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.122 +2025-03-24 15:05:57,267 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.636 +2025-03-24 15:05:57,268 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:05:57,270 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:06:22,308 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.868413 +2025-03-24 15:06:22,309 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:51:31 +2025-03-24 15:06:22,309 - INFO - allennlp.training.trainer - Epoch 67/99 +2025-03-24 15:06:22,309 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:06:22,675 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:06:22,676 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:06:22,676 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:06:22,676 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:06:22,677 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:06:22,677 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:06:22,677 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:06:22,692 - INFO - allennlp.training.trainer - Training +2025-03-24 15:07:36,657 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:07:45,422 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:07:45,423 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:07:45,424 - INFO - allennlp.training.tensorboard_writer - loss | 0.029 | 1.317 +2025-03-24 15:07:45,425 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:07:45,426 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:07:45,426 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:07:45,426 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.722 +2025-03-24 15:07:45,427 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:07:45,427 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:07:45,428 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.118 +2025-03-24 15:07:45,428 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.634 +2025-03-24 15:07:45,429 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:07:45,429 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:08:22,297 - INFO - allennlp.training.trainer - Epoch duration: 0:01:59.988171 +2025-03-24 15:08:22,298 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:50:10 +2025-03-24 15:08:22,298 - INFO - allennlp.training.trainer - Epoch 68/99 +2025-03-24 15:08:22,299 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:08:22,619 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:08:22,620 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:08:22,620 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:08:22,620 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:08:22,621 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:08:22,621 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:08:22,621 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:08:22,634 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_add', 'N_0', 'N_1', 'g_divide', 'N_1', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'N_0', 'V_0']], [['g_bili', 'N_3', 'N_1', 'N_2', 'gougu_add', 'V_0', 'N_0']], [['g_sin', 'C_0', 'g_mul', 'V_0', 'N_0', 'g_add', 'V_1', 'N_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_tan', 'N_1', 'g_add', 'V_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_1', 'N_0', 'gougu_add', 'V_0', 'N_0']], [['g_equal', 'N_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_tan', 'N_1', 'g_divide', 'N_0', 'V_0']], [['g_sin', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'N_2', 'N_0', 'g_mul', 'V_1', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_add', 'N_0', 'V_0', 'g_bili', 'N_2', 'N_0', 'V_1', 'g_add', 'V_2', 'N_2']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_add', 'V_0', 'V_1']], [['g_minus', 'C_2', 'N_1', 'g_minus', 'N_2', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['gougu_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_divide', 'N_0', 'N_3', 'g_divide', 'N_4', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'N_1', 'V_0', 'N_2', 'g_half', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_double', 'N_1', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_tan', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_add', 'V_1', 'V_0']]] +2025-03-24 15:09:42,401 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:09:52,540 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:09:52,541 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:09:52,542 - INFO - allennlp.training.tensorboard_writer - loss | 0.029 | 1.336 +2025-03-24 15:09:52,543 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:09:52,543 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:09:52,544 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:09:52,544 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.716 +2025-03-24 15:09:52,544 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:09:52,545 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:09:52,545 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.123 +2025-03-24 15:09:52,546 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.631 +2025-03-24 15:09:52,546 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:09:52,547 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:10:17,217 - INFO - allennlp.training.trainer - Epoch duration: 0:01:54.918312 +2025-03-24 15:10:17,218 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:48:45 +2025-03-24 15:10:17,218 - INFO - allennlp.training.trainer - Epoch 69/99 +2025-03-24 15:10:17,218 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:10:17,591 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:10:17,592 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:10:17,592 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:10:17,592 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:10:17,592 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:10:17,592 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:10:17,593 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:10:17,606 - INFO - allennlp.training.trainer - Training +2025-03-24 15:11:32,868 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:11:41,598 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:11:41,598 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:11:41,599 - INFO - allennlp.training.tensorboard_writer - loss | 0.028 | 1.330 +2025-03-24 15:11:41,599 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:11:41,599 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:11:41,600 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:11:41,600 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.723 +2025-03-24 15:11:41,601 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:11:41,601 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:11:41,602 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.114 +2025-03-24 15:11:41,602 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.640 +2025-03-24 15:11:41,602 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:11:41,603 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:12:08,089 - INFO - allennlp.training.trainer - Epoch duration: 0:01:50.870937 +2025-03-24 15:12:08,090 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:47:18 +2025-03-24 15:12:08,090 - INFO - allennlp.training.trainer - Epoch 70/99 +2025-03-24 15:12:08,090 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:12:08,509 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:12:08,510 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:12:08,510 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:12:08,511 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:12:08,511 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:12:08,511 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:12:08,511 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:12:08,524 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_2', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'C_2', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_circle_area', 'N_0']], [['g_double', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'g_double', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1']], [['g_bili', 'C_5', 'C_3', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'V_0', 'N_1']], [['g_cos', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_half', 'V_1']], [['gougu_minus', 'N_0', 'N_1', 'cal_circle_area', 'V_0']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_half', 'N_0', 'cal_cone', 'V_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['gougu_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0', 'g_minus', 'N_2', 'V_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_divide', 'N_2', 'N_3', 'g_divide', 'N_4', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']]] +selected_programs [[['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_divide', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['cal_circle_area', 'N_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_minus', 'N_0', 'V_0', 'cal_circle_area', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_double', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'C_5']], [['cal_circle_area', 'N_0', 'g_divide', 'N_1', 'C_4', 'g_mul', 'V_0', 'V_1']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_mul', 'N_2', 'V_1']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_bili', 'N_3', 'N_2', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2']], [['g_bili', 'N_4', 'N_5', 'N_5', 'g_add', 'V_0', 'N_5']], [['g_bili', 'N_1', 'N_0', 'N_1']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_minus', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']]] +2025-03-24 15:13:22,930 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:13:31,619 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:13:31,620 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:13:31,620 - INFO - allennlp.training.tensorboard_writer - loss | 0.026 | 1.332 +2025-03-24 15:13:31,621 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:13:31,621 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:13:31,622 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:13:31,622 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.717 +2025-03-24 15:13:31,622 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:13:31,622 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:13:31,622 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.109 +2025-03-24 15:13:31,623 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.644 +2025-03-24 15:13:31,623 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:13:31,624 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:13:57,735 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.645165 +2025-03-24 15:13:57,736 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:45:50 +2025-03-24 15:13:57,736 - INFO - allennlp.training.trainer - Epoch 71/99 +2025-03-24 15:13:57,736 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:13:58,190 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:13:58,191 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:13:58,191 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:13:58,191 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:13:58,191 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:13:58,191 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:13:58,192 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:13:58,204 - INFO - allennlp.training.trainer - Training +2025-03-24 15:15:12,046 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:15:20,780 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:15:20,780 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:15:20,781 - INFO - allennlp.training.tensorboard_writer - loss | 0.026 | 1.344 +2025-03-24 15:15:20,781 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:15:20,782 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:15:20,782 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:15:20,783 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.712 +2025-03-24 15:15:20,783 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:15:20,783 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:15:20,783 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.118 +2025-03-24 15:15:20,784 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.642 +2025-03-24 15:15:20,784 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:15:20,785 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:15:44,796 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.059443 +2025-03-24 15:15:44,796 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:44:20 +2025-03-24 15:15:44,797 - INFO - allennlp.training.trainer - Epoch 72/99 +2025-03-24 15:15:44,797 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:15:45,190 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:15:45,191 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:15:45,204 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_equal', 'N_1']], [['gougu_minus', 'N_1', 'N_2', 'g_add', 'N_0', 'V_0', 'g_add', 'V_1', 'N_2']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_divide', 'N_2', 'N_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_double', 'N_2', 'g_add', 'V_0', 'N_2', 'g_add', 'V_1', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_add', 'N_0', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_1', 'g_minus', 'V_1', 'N_2', 'g_half', 'V_2']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']]] +2025-03-24 15:16:59,171 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:17:07,922 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:17:07,923 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:17:07,923 - INFO - allennlp.training.tensorboard_writer - loss | 0.024 | 1.354 +2025-03-24 15:17:07,924 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:17:07,925 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:17:07,925 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:17:07,925 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.724 +2025-03-24 15:17:07,926 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:17:07,926 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:17:07,927 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.121 +2025-03-24 15:17:07,927 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.638 +2025-03-24 15:17:07,928 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:17:07,928 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:17:32,029 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.231922 +2025-03-24 15:17:32,030 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:42:49 +2025-03-24 15:17:32,031 - INFO - allennlp.training.trainer - Epoch 73/99 +2025-03-24 15:17:32,031 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:17:32,393 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:17:32,394 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:17:32,410 - INFO - allennlp.training.trainer - Training +2025-03-24 15:18:51,729 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:19:01,899 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:19:01,900 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:19:01,901 - INFO - allennlp.training.tensorboard_writer - loss | 0.023 | 1.360 +2025-03-24 15:19:01,901 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:19:01,902 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:19:01,903 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:19:01,903 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.726 +2025-03-24 15:19:01,903 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:19:01,904 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:19:01,904 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.115 +2025-03-24 15:19:01,905 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.650 +2025-03-24 15:19:01,905 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:19:01,905 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:19:27,258 - INFO - allennlp.training.trainer - Epoch duration: 0:01:55.226941 +2025-03-24 15:19:27,259 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:41:21 +2025-03-24 15:19:27,259 - INFO - allennlp.training.trainer - Epoch 74/99 +2025-03-24 15:19:27,259 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:19:27,601 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:19:27,602 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:19:27,616 - INFO - allennlp.training.trainer - Training +2025-03-24 15:20:45,470 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:20:54,150 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:20:54,150 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:20:54,150 - INFO - allennlp.training.tensorboard_writer - loss | 0.023 | 1.359 +2025-03-24 15:20:54,151 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:20:54,151 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:20:54,152 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:20:54,153 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.724 +2025-03-24 15:20:54,153 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:20:54,153 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:20:54,154 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.118 +2025-03-24 15:20:54,154 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.644 +2025-03-24 15:20:54,155 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:20:54,156 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:21:18,356 - INFO - allennlp.training.trainer - Epoch duration: 0:01:51.096131 +2025-03-24 15:21:18,357 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:39:51 +2025-03-24 15:21:18,357 - INFO - allennlp.training.trainer - Epoch 75/99 +2025-03-24 15:21:18,358 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:21:18,729 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:21:18,730 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:21:18,731 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:21:18,731 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:21:18,731 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:21:18,731 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:21:18,731 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:21:18,747 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_mul', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'C_2']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_half', 'N_0', 'g_half', 'V_0', 'gougu_minus', 'V_0', 'V_1', 'g_double', 'V_2']], [['g_double', 'N_0', 'g_add', 'V_0', 'N_0', 'g_add', 'V_1', 'N_0', 'g_add', 'N_0', 'V_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_1', 'g_sin', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_half', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'N_0', 'g_add', 'V_0', 'C_2']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_double', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_minus', 'N_1', 'V_1']], [['g_double', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_bili', 'N_1', 'V_0', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'N_0', 'N_1', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']]] +selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_add', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_1', 'N_0', 'g_minus', 'V_2', 'V_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0']]] +2025-03-24 15:22:33,013 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:22:41,723 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:22:41,724 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:22:41,724 - INFO - allennlp.training.tensorboard_writer - loss | 0.023 | 1.360 +2025-03-24 15:22:41,726 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:22:41,726 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:22:41,727 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:22:41,727 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.724 +2025-03-24 15:22:41,727 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:22:41,727 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:22:41,727 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.130 +2025-03-24 15:22:41,728 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.633 +2025-03-24 15:22:41,728 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:22:41,729 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:23:06,366 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.008678 +2025-03-24 15:23:06,367 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:38:19 +2025-03-24 15:23:06,367 - INFO - allennlp.training.trainer - Epoch 76/99 +2025-03-24 15:23:06,367 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:23:06,767 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:23:06,768 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:23:06,768 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:23:06,769 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:23:06,769 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:23:06,769 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:23:06,769 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:23:06,782 - INFO - allennlp.training.trainer - Training +2025-03-24 15:24:21,135 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:24:29,850 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:24:29,850 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:24:29,851 - INFO - allennlp.training.tensorboard_writer - loss | 0.022 | 1.364 +2025-03-24 15:24:29,852 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:24:29,853 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:24:29,853 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:24:29,854 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.727 +2025-03-24 15:24:29,854 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:24:29,854 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:24:29,854 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.120 +2025-03-24 15:24:29,855 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.650 +2025-03-24 15:24:29,855 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:24:29,856 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:24:53,620 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.252715 +2025-03-24 15:24:53,620 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:36:47 +2025-03-24 15:24:53,620 - INFO - allennlp.training.trainer - Epoch 77/99 +2025-03-24 15:24:53,620 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:24:53,982 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:24:53,983 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:24:53,984 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:24:53,984 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:24:53,984 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:24:53,984 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:24:53,984 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:24:53,997 - INFO - allennlp.training.trainer - Training +2025-03-24 15:26:09,366 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:26:19,489 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:26:19,490 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:26:19,491 - INFO - allennlp.training.tensorboard_writer - loss | 0.020 | 1.365 +2025-03-24 15:26:19,492 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:26:19,492 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:26:19,493 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:26:19,493 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.721 +2025-03-24 15:26:19,493 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:26:19,494 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:26:19,494 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.120 +2025-03-24 15:26:19,494 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.648 +2025-03-24 15:26:19,495 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:26:19,495 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:26:43,275 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.654991 +2025-03-24 15:26:43,276 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:35:14 +2025-03-24 15:26:43,276 - INFO - allennlp.training.trainer - Epoch 78/99 +2025-03-24 15:26:43,276 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:26:43,663 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:26:43,664 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:26:43,664 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:26:43,664 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:26:43,664 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:26:43,664 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:26:43,665 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:26:43,677 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_double', 'N_0', 'g_add', 'N_1', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_tan', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'N_1', 'g_minus', 'V_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']]] +selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_sin', 'V_0', 'g_mul', 'N_2', 'V_1']], [['g_minus', 'N_3', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_3', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'N_2', 'N_0', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_1', 'g_add', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'N_1', 'V_0', 'g_bili', 'V_1', 'N_2', 'N_1', 'gougu_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_mul', 'N_2', 'V_0', 'g_sin', 'V_1', 'g_mul', 'V_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']]] +2025-03-24 15:28:00,017 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:28:08,738 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:28:08,739 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:28:08,740 - INFO - allennlp.training.tensorboard_writer - loss | 0.020 | 1.382 +2025-03-24 15:28:08,741 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:28:08,741 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:28:08,742 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:28:08,742 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.724 +2025-03-24 15:28:08,743 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:28:08,743 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:28:08,743 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.120 +2025-03-24 15:28:08,744 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.639 +2025-03-24 15:28:08,744 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:28:08,745 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:28:32,177 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.900667 +2025-03-24 15:28:32,177 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:33:42 +2025-03-24 15:28:32,177 - INFO - allennlp.training.trainer - Epoch 79/99 +2025-03-24 15:28:32,178 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:28:32,550 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:28:32,551 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:28:32,568 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'gougu_add', 'V_0', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_add', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_1']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0']]] +selected_programs [[['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_2', 'g_minus', 'V_2', 'N_1']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_1', 'g_add', 'N_2', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_minus', 'C_3', 'N_1']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1']], [['g_add', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_add', 'N_0', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_equal', 'N_1']], [['g_half', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'N_0']]] +2025-03-24 15:29:47,668 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:29:56,456 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:29:56,457 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:29:56,457 - INFO - allennlp.training.tensorboard_writer - loss | 0.021 | 1.376 +2025-03-24 15:29:56,457 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:29:56,457 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:29:56,458 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:29:56,458 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.727 +2025-03-24 15:29:56,459 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:29:56,459 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:29:56,460 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.113 +2025-03-24 15:29:56,460 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.650 +2025-03-24 15:29:56,461 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:29:56,461 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:30:22,319 - INFO - allennlp.training.trainer - Epoch duration: 0:01:50.141526 +2025-03-24 15:30:22,320 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:32:09 +2025-03-24 15:30:22,320 - INFO - allennlp.training.trainer - Epoch 80/99 +2025-03-24 15:30:22,320 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:30:22,721 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:30:22,722 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:30:22,723 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:30:22,723 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:30:22,723 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:30:22,723 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:30:22,723 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:30:22,737 - INFO - allennlp.training.trainer - Training +2025-03-24 15:31:42,405 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:31:52,540 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:31:52,540 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:31:52,541 - INFO - allennlp.training.tensorboard_writer - loss | 0.020 | 1.382 +2025-03-24 15:31:52,541 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:31:52,542 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:31:52,543 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:31:52,543 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.725 +2025-03-24 15:31:52,543 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:31:52,544 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:31:52,544 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.111 +2025-03-24 15:31:52,545 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.651 +2025-03-24 15:31:52,545 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:31:52,546 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:32:03,999 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'. +2025-03-24 15:32:30,588 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'test//best.th'. +2025-03-24 15:32:35,555 - INFO - allennlp.training.trainer - Epoch duration: 0:02:13.235409 +2025-03-24 15:32:35,556 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:30:41 +2025-03-24 15:32:35,556 - INFO - allennlp.training.trainer - Epoch 81/99 +2025-03-24 15:32:35,557 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:32:35,928 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:32:35,929 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:32:35,946 - INFO - allennlp.training.trainer - Training +2025-03-24 15:33:50,127 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:33:58,842 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:33:58,842 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:33:58,843 - INFO - allennlp.training.tensorboard_writer - loss | 0.018 | 1.383 +2025-03-24 15:33:58,844 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:33:58,844 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:33:58,845 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:33:58,845 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.724 +2025-03-24 15:33:58,846 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:33:58,846 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:33:58,847 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.116 +2025-03-24 15:33:58,847 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.644 +2025-03-24 15:33:58,847 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:33:58,848 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:34:22,366 - INFO - allennlp.training.trainer - Epoch duration: 0:01:46.809914 +2025-03-24 15:34:22,367 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:29:06 +2025-03-24 15:34:22,367 - INFO - allennlp.training.trainer - Epoch 82/99 +2025-03-24 15:34:22,367 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:34:22,734 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:34:22,735 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:34:22,749 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'V_1', 'N_1']], [['g_minus', 'N_3', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_3', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'N_2', 'N_0', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_1', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2', 'g_minus', 'V_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_mul', 'N_2', 'V_0', 'g_sin', 'V_1', 'g_mul', 'V_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']]] +2025-03-24 15:35:36,978 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:35:45,771 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:35:45,772 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:35:45,772 - INFO - allennlp.training.tensorboard_writer - loss | 0.019 | 1.380 +2025-03-24 15:35:45,773 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:35:45,773 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:35:45,774 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:35:45,774 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.727 +2025-03-24 15:35:45,775 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:35:45,775 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:35:45,775 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.124 +2025-03-24 15:35:45,776 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.650 +2025-03-24 15:35:45,776 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:35:45,777 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:36:09,614 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.246433 +2025-03-24 15:36:09,614 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:27:31 +2025-03-24 15:36:09,614 - INFO - allennlp.training.trainer - Epoch 83/99 +2025-03-24 15:36:09,615 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:36:09,994 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:36:09,995 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:36:09,995 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:36:09,995 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:36:09,995 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:36:09,995 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:36:09,996 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:36:10,009 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_equal', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_4', 'V_1', 'g_half', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_4', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_3', 'g_minus', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0', 'g_cos', 'V_0', 'g_mul', 'C_5', 'N_0']], [['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']]] +selected_programs [[['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_cos', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_add', 'N_0', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_minus', 'N_1', 'V_1']], [['g_equal', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'N_0', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_equal', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']]] +2025-03-24 15:37:28,705 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:37:38,898 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:37:38,898 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:37:38,898 - INFO - allennlp.training.tensorboard_writer - loss | 0.018 | 1.388 +2025-03-24 15:37:38,899 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:37:38,900 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:37:38,900 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:37:38,901 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.733 +2025-03-24 15:37:38,901 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:37:38,901 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:37:38,902 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.113 +2025-03-24 15:37:38,902 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.651 +2025-03-24 15:37:38,903 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:37:38,903 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:38:03,025 - INFO - allennlp.training.trainer - Epoch duration: 0:01:53.410961 +2025-03-24 15:38:03,026 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:25:57 +2025-03-24 15:38:03,026 - INFO - allennlp.training.trainer - Epoch 84/99 +2025-03-24 15:38:03,027 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:38:03,466 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:38:03,467 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:38:03,491 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0', 'g_half', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_mul', 'N_2', 'N_5']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_minus', 'V_2', 'V_1']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_minus', 'N_1', 'V_1']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1', 'g_minus', 'V_0', 'V_2']], [['gougu_add', 'N_0', 'N_1']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_half', 'V_0', 'g_minus', 'V_2', 'V_1']], [['g_minus', 'C_4', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_2', 'gougu_add', 'V_1', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2', 'gougu_minus', 'N_0', 'V_0']]] +2025-03-24 15:39:18,135 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:39:27,917 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:39:27,918 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:39:27,919 - INFO - allennlp.training.tensorboard_writer - loss | 0.018 | 1.389 +2025-03-24 15:39:27,920 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:39:27,920 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:39:27,921 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:39:27,921 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.726 +2025-03-24 15:39:27,921 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:39:27,921 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:39:27,921 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.112 +2025-03-24 15:39:27,922 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.640 +2025-03-24 15:39:27,923 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:39:27,923 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:39:52,351 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.324595 +2025-03-24 15:39:52,352 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:24:22 +2025-03-24 15:39:52,353 - INFO - allennlp.training.trainer - Epoch 85/99 +2025-03-24 15:39:52,353 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:39:52,757 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:39:52,758 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:39:52,758 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:39:52,758 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:39:52,759 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:39:52,759 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:39:52,759 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:39:52,772 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'C_2', 'g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'N_0', 'C_2', 'g_double', 'V_0', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_tan', 'N_0', 'g_mul', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_double', 'V_0', 'g_minus', 'C_3', 'C_2', 'g_minus', 'V_1', 'V_2']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_sin', 'V_1', 'g_divide', 'N_1', 'V_2']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_equal', 'C_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_tan', 'N_0', 'g_mul', 'N_1', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_half', 'C_2', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_2', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_equal', 'N_1']], [['g_equal', 'N_0']]] +2025-03-24 15:41:10,050 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:41:20,308 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:41:20,309 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:41:20,310 - INFO - allennlp.training.tensorboard_writer - loss | 0.019 | 1.393 +2025-03-24 15:41:20,311 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:41:20,312 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:41:20,312 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:41:20,312 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.726 +2025-03-24 15:41:20,313 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:41:20,313 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:41:20,313 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.111 +2025-03-24 15:41:20,314 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.640 +2025-03-24 15:41:20,314 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:41:20,314 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:41:44,757 - INFO - allennlp.training.trainer - Epoch duration: 0:01:52.404633 +2025-03-24 15:41:44,758 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:22:47 +2025-03-24 15:41:44,758 - INFO - allennlp.training.trainer - Epoch 86/99 +2025-03-24 15:41:44,759 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:41:45,127 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:41:45,128 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:41:45,128 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:41:45,129 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:41:45,129 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:41:45,129 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:41:45,129 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:41:45,143 - INFO - allennlp.training.trainer - Training +2025-03-24 15:43:06,361 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:43:16,572 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:43:16,572 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:43:16,573 - INFO - allennlp.training.tensorboard_writer - loss | 0.018 | 1.396 +2025-03-24 15:43:16,574 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:43:16,575 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:43:16,575 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:43:16,575 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.727 +2025-03-24 15:43:16,576 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:43:16,576 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:43:16,576 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.115 +2025-03-24 15:43:16,577 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.644 +2025-03-24 15:43:16,578 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:43:16,578 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:43:40,417 - INFO - allennlp.training.trainer - Epoch duration: 0:01:55.658674 +2025-03-24 15:43:40,418 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:21:12 +2025-03-24 15:43:40,418 - INFO - allennlp.training.trainer - Epoch 87/99 +2025-03-24 15:43:40,418 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:43:40,775 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:43:40,776 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:43:40,776 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:43:40,777 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:43:40,777 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:43:40,777 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:43:40,777 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:43:40,790 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_2', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'C_2', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_circle_area', 'N_0']], [['g_double', 'N_1', 'g_sin', 'V_0', 'g_mul', 'N_0', 'V_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'g_double', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1']], [['g_bili', 'C_5', 'C_3', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'V_0', 'N_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'cal_cone', 'V_0', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'cal_circle_area', 'V_0']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_half', 'N_0', 'cal_cone', 'V_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['gougu_add', 'N_0', 'N_1', 'g_bili', 'N_0', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'g_add', 'V_0', 'N_2', 'g_add', 'V_1', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_divide', 'N_2', 'N_3', 'g_divide', 'N_4', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']]] +selected_programs [[['g_bili', 'N_1', 'N_0', 'N_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_equal', 'N_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_divide', 'V_0', 'N_1']], [['gougu_minus', 'N_1', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_divide', 'N_1', 'N_0', 'g_mul', 'V_0', 'V_0', 'g_mul', 'V_1', 'N_2']], [['g_minus', 'N_2', 'N_2', 'g_add', 'V_0', 'N_1', 'g_add', 'V_1', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']], [['g_half', 'N_0']], [['g_add', 'N_1', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1']], [['g_mul', 'N_0', 'N_1', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_1', 'g_minus', 'V_1', 'N_2', 'g_half', 'V_2']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_equal', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_1', 'V_0']]] +2025-03-24 15:44:54,799 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:45:03,521 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:45:03,522 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:45:03,523 - INFO - allennlp.training.tensorboard_writer - loss | 0.017 | 1.397 +2025-03-24 15:45:03,523 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:45:03,523 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:45:03,525 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:45:03,525 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.727 +2025-03-24 15:45:03,526 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:45:03,526 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:45:03,526 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.112 +2025-03-24 15:45:03,527 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.647 +2025-03-24 15:45:03,527 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:45:03,528 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:45:27,848 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.430111 +2025-03-24 15:45:27,849 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:19:35 +2025-03-24 15:45:27,849 - INFO - allennlp.training.trainer - Epoch 88/99 +2025-03-24 15:45:27,849 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:45:28,202 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:45:28,203 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:45:28,216 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_equal', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_minus', 'C_4', 'V_1', 'g_half', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_add', 'N_0', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_double', 'V_1']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_3', 'g_minus', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_0']], [['g_double', 'N_0']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0', 'g_cos', 'V_0', 'g_mul', 'V_0', 'V_1']], [['g_sin', 'C_1', 'g_mul', 'V_0', 'N_0', 'g_double', 'V_1']], [['g_equal', 'N_1']], [['g_equal', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1', 'g_minus', 'C_3', 'V_2']]] +selected_programs [[['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0', 'g_half', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_mul', 'N_2', 'N_5']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_half', 'N_0', 'g_half', 'C_3', 'g_add', 'V_0', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_minus', 'N_1', 'V_1']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_cos', 'N_0', 'g_divide', 'N_1', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_minus', 'C_4', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_2', 'gougu_add', 'V_1', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'V_0', 'N_2']], [['g_bili', 'N_0', 'N_2', 'N_1', 'gougu_add', 'V_0', 'N_2']]] +selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'V_1', 'N_1']], [['g_minus', 'N_2', 'N_3', 'g_tan', 'N_0', 'g_mul', 'V_0', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_1', 'N_1', 'g_bili', 'N_1', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'V_0', 'N_1', 'g_bili', 'V_1', 'N_2', 'N_1', 'gougu_minus', 'V_2', 'N_3']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_mul', 'N_2', 'V_0', 'g_sin', 'V_1', 'g_mul', 'V_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']]] +2025-03-24 15:46:43,035 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:46:51,809 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:46:51,810 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:46:51,811 - INFO - allennlp.training.tensorboard_writer - loss | 0.017 | 1.394 +2025-03-24 15:46:51,812 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:46:51,812 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:46:51,812 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:46:51,812 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.725 +2025-03-24 15:46:51,813 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:46:51,814 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:46:51,814 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.116 +2025-03-24 15:46:51,814 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.644 +2025-03-24 15:46:51,815 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:46:51,815 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:47:15,616 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.766767 +2025-03-24 15:47:15,617 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:17:59 +2025-03-24 15:47:15,617 - INFO - allennlp.training.trainer - Epoch 89/99 +2025-03-24 15:47:15,617 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:47:15,951 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:47:15,952 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:47:15,952 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:47:15,952 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:47:15,953 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:47:15,953 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:47:15,953 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:47:15,966 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0', 'g_half', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_mul', 'N_2', 'N_5']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_add', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0']], [['g_half', 'N_0', 'g_half', 'C_3', 'g_add', 'V_0', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_minus', 'N_1', 'V_1']], [['gougu_minus', 'N_0', 'N_1', 'g_double', 'V_0']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_2', 'V_1', 'g_minus', 'V_0', 'V_2']], [['gougu_add', 'N_0', 'N_1']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_half', 'N_0', 'gougu_add', 'V_0', 'N_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_2', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_half', 'V_1']], [['g_minus', 'C_4', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['gougu_minus', 'N_1', 'N_0', 'g_minus', 'V_0', 'N_2', 'gougu_add', 'V_1', 'V_0']], [['g_bili', 'N_2', 'N_0', 'N_1', 'gougu_add', 'V_0', 'N_2']], [['g_bili', 'N_1', 'N_0', 'N_2', 'gougu_add', 'V_0', 'N_2']]] +2025-03-24 15:48:31,214 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:48:39,959 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:48:39,960 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:48:39,960 - INFO - allennlp.training.tensorboard_writer - loss | 0.017 | 1.401 +2025-03-24 15:48:39,960 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:48:39,961 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:48:39,961 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:48:39,961 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.723 +2025-03-24 15:48:39,963 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:48:39,963 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:48:39,963 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.117 +2025-03-24 15:48:39,964 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.635 +2025-03-24 15:48:39,964 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:48:39,965 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:49:03,948 - INFO - allennlp.training.trainer - Epoch duration: 0:01:48.330674 +2025-03-24 15:49:03,948 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:16:22 +2025-03-24 15:49:03,949 - INFO - allennlp.training.trainer - Epoch 90/99 +2025-03-24 15:49:03,949 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:49:04,318 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:49:04,319 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:49:04,333 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'C_2', 'g_half', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1', 'g_double', 'V_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]] +selected_programs [[['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'gougu_minus', 'N_1', 'V_1']], [['g_double', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_half', 'N_0', 'g_minus', 'N_0', 'V_0', 'g_add', 'V_0', 'V_1']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'V_0', 'N_1', 'g_minus', 'V_1', 'N_1', 'g_half', 'V_2']], [['g_equal', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['gougu_minus', 'N_1', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']]] +2025-03-24 15:50:18,618 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:50:27,374 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:50:27,375 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:50:27,376 - INFO - allennlp.training.tensorboard_writer - loss | 0.015 | 1.402 +2025-03-24 15:50:27,376 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:50:27,376 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:50:27,377 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:50:27,377 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.727 +2025-03-24 15:50:27,378 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:50:27,378 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:50:27,379 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.118 +2025-03-24 15:50:27,379 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.642 +2025-03-24 15:50:27,379 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:50:27,380 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:50:51,348 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.399855 +2025-03-24 15:50:51,349 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:14:44 +2025-03-24 15:50:51,349 - INFO - allennlp.training.trainer - Epoch 91/99 +2025-03-24 15:50:51,350 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:50:51,713 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:50:51,714 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:50:51,714 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:50:51,714 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:50:51,715 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:50:51,715 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:50:51,715 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:50:51,729 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_minus', 'V_1', 'V_0', 'g_minus', 'V_1', 'V_2']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_add', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0']]] +2025-03-24 15:52:05,803 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:52:14,552 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:52:14,552 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:52:14,552 - INFO - allennlp.training.tensorboard_writer - loss | 0.016 | 1.409 +2025-03-24 15:52:14,553 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:52:14,553 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:52:14,553 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:52:14,553 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.726 +2025-03-24 15:52:14,555 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:52:14,555 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:52:14,555 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.117 +2025-03-24 15:52:14,556 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.638 +2025-03-24 15:52:14,556 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:52:14,556 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:52:37,802 - INFO - allennlp.training.trainer - Epoch duration: 0:01:46.452376 +2025-03-24 15:52:37,802 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:13:07 +2025-03-24 15:52:37,803 - INFO - allennlp.training.trainer - Epoch 92/99 +2025-03-24 15:52:37,803 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:52:38,158 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:52:38,159 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:52:38,159 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:52:38,160 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:52:38,160 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:52:38,160 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:52:38,160 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:52:38,174 - INFO - allennlp.training.trainer - Training +2025-03-24 15:53:52,268 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:54:01,017 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:54:01,018 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:54:01,018 - INFO - allennlp.training.tensorboard_writer - loss | 0.016 | 1.410 +2025-03-24 15:54:01,019 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:54:01,019 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:54:01,020 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:54:01,020 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.729 +2025-03-24 15:54:01,021 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:54:01,021 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:54:01,021 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.126 +2025-03-24 15:54:01,022 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.633 +2025-03-24 15:54:01,022 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:54:01,023 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:54:27,389 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.585760 +2025-03-24 15:54:27,391 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:11:29 +2025-03-24 15:54:27,391 - INFO - allennlp.training.trainer - Epoch 93/99 +2025-03-24 15:54:27,392 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:54:27,742 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:54:27,743 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:54:27,744 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:54:27,744 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:54:27,744 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:54:27,744 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:54:27,744 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:54:27,764 - INFO - allennlp.training.trainer - Training +2025-03-24 15:55:42,835 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:55:51,645 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:55:51,645 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:55:51,646 - INFO - allennlp.training.tensorboard_writer - loss | 0.015 | 1.411 +2025-03-24 15:55:51,646 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:55:51,646 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:55:51,648 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:55:51,648 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.729 +2025-03-24 15:55:51,649 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:55:51,649 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:55:51,649 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.122 +2025-03-24 15:55:51,650 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.637 +2025-03-24 15:55:51,650 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:55:51,651 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:56:16,470 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.078993 +2025-03-24 15:56:16,471 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:09:51 +2025-03-24 15:56:16,471 - INFO - allennlp.training.trainer - Epoch 94/99 +2025-03-24 15:56:16,471 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:56:16,858 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:56:16,859 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:56:16,859 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:56:16,860 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:56:16,860 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:56:16,860 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:56:16,860 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:56:16,876 - INFO - allennlp.training.trainer - Training +2025-03-24 15:57:32,201 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:57:41,037 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:57:41,038 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:57:41,039 - INFO - allennlp.training.tensorboard_writer - loss | 0.015 | 1.411 +2025-03-24 15:57:41,039 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:57:41,040 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:57:41,041 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:57:41,041 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.728 +2025-03-24 15:57:41,041 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:57:41,041 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:57:41,042 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.118 +2025-03-24 15:57:41,043 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.637 +2025-03-24 15:57:41,043 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:57:41,043 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:58:05,868 - INFO - allennlp.training.trainer - Epoch duration: 0:01:49.396995 +2025-03-24 15:58:05,869 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:08:13 +2025-03-24 15:58:05,869 - INFO - allennlp.training.trainer - Epoch 95/99 +2025-03-24 15:58:05,869 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:58:06,283 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:58:06,284 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:58:06,284 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:58:06,285 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:58:06,285 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:58:06,285 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:58:06,285 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:58:06,300 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_double', 'N_0']], [['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_1', 'g_half', 'N_0', 'g_minus', 'V_1', 'V_0', 'g_minus', 'V_1', 'V_2']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_0', 'g_add', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1', 'g_minus', 'C_3', 'V_1', 'g_half', 'V_2']], [['g_minus', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_3', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1', 'g_minus', 'V_0', 'V_2']], [['g_minus', 'C_3', 'N_0']]] +selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'V_1', 'N_1']], [['g_minus', 'N_3', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_3', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_1', 'N_1', 'g_bili', 'N_0', 'V_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'N_1', 'V_0', 'g_mul', 'V_1', 'V_1', 'g_divide', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_tan', 'N_1', 'g_minus', 'V_0', 'V_1', 'g_mul', 'V_2', 'N_2']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_bili', 'N_1', 'N_0', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']]] +2025-03-24 15:59:21,191 - INFO - allennlp.training.trainer - Validating +2025-03-24 15:59:30,059 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 15:59:30,059 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 15:59:30,060 - INFO - allennlp.training.tensorboard_writer - loss | 0.015 | 1.412 +2025-03-24 15:59:30,061 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 15:59:30,062 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 15:59:30,063 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 15:59:30,063 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.727 +2025-03-24 15:59:30,063 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 15:59:30,064 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 15:59:30,064 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.122 +2025-03-24 15:59:30,065 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.639 +2025-03-24 15:59:30,065 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 15:59:30,066 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 15:59:53,072 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.203361 +2025-03-24 15:59:53,073 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:06:35 +2025-03-24 15:59:53,073 - INFO - allennlp.training.trainer - Epoch 96/99 +2025-03-24 15:59:53,074 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 15:59:53,499 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 15:59:53,500 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 15:59:53,500 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 15:59:53,500 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 15:59:53,501 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 15:59:53,501 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 15:59:53,501 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 15:59:53,523 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_sin', 'N_1', 'g_mul', 'N_0', 'V_0']], [['g_sin', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_sin', 'N_2', 'g_divide', 'V_1', 'V_2']], [['g_minus', 'N_4', 'N_3', 'g_minus', 'N_2', 'N_0', 'g_mul', 'V_1', 'N_1']], [['g_minus', 'N_2', 'N_3', 'g_tan', 'N_0', 'g_mul', 'V_0', 'V_1']], [['g_sin', 'N_2', 'g_mul', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_divide', 'V_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'N_2', 'N_0', 'V_0']], [['g_divide', 'N_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_add', 'N_1', 'N_1', 'g_bili', 'N_1', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'N_2']], [['g_add', 'N_0', 'N_1', 'g_bili', 'V_0', 'N_0', 'N_2']], [['gougu_add', 'N_1', 'N_2', 'g_minus', 'N_1', 'V_0', 'g_bili', 'V_1', 'N_2', 'N_1', 'gougu_minus', 'V_2', 'N_3']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_mul', 'V_0', 'V_0', 'g_mul', 'N_2', 'V_1', 'g_minus', 'V_2', 'N_2']], [['g_bili', 'N_2', 'N_0', 'N_1']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0']], [['gougu_add', 'N_0', 'N_0', 'gougu_add', 'N_0', 'V_0', 'gougu_add', 'N_1', 'V_0', 'gougu_add', 'V_2', 'V_1']], [['g_divide', 'N_0', 'N_2']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_0', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0']], [['g_double', 'N_1', 'g_add', 'N_0', 'V_0']], [['g_half', 'N_0', 'g_sin', 'V_0', 'g_mul', 'N_1', 'V_1']], [['g_tan', 'N_0', 'g_mul', 'N_2', 'V_0', 'g_sin', 'V_1', 'g_mul', 'V_2', 'N_1']], [['g_bili', 'N_0', 'N_1', 'N_2', 'g_minus', 'N_2', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_minus', 'N_0', 'N_1', 'g_divide', 'V_1', 'V_0', 'g_minus', 'N_0', 'V_2']], [['g_mul', 'N_0', 'N_2', 'g_divide', 'V_0', 'N_1']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_half', 'N_0', 'g_mul', 'V_0', 'V_0']], [['g_divide', 'N_0', 'N_1', 'g_divide', 'N_2', 'V_0', 'g_add', 'V_1', 'N_2']], [['g_double', 'N_0']], [['g_add', 'N_0', 'N_1', 'g_divide', 'N_0', 'V_0', 'g_divide', 'N_2', 'V_1']]] +2025-03-24 16:01:11,210 - INFO - allennlp.training.trainer - Validating +2025-03-24 16:01:20,012 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 16:01:20,012 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 16:01:20,013 - INFO - allennlp.training.tensorboard_writer - loss | 0.015 | 1.416 +2025-03-24 16:01:20,013 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 16:01:20,014 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 16:01:20,015 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 16:01:20,015 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.730 +2025-03-24 16:01:20,015 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 16:01:20,015 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 16:01:20,016 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.117 +2025-03-24 16:01:20,016 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.640 +2025-03-24 16:01:20,017 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 16:01:20,017 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 16:01:43,584 - INFO - allennlp.training.trainer - Epoch duration: 0:01:50.510637 +2025-03-24 16:01:43,585 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:04:56 +2025-03-24 16:01:43,585 - INFO - allennlp.training.trainer - Epoch 97/99 +2025-03-24 16:01:43,585 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 16:01:43,934 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 16:01:43,935 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 16:01:43,936 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 16:01:43,936 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 16:01:43,936 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 16:01:43,936 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 16:01:43,936 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 16:01:43,950 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_minus', 'C_2', 'N_1', 'g_minus', 'C_3', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_mul', 'N_0', 'N_0', 'g_divide', 'C_2', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_circle_area', 'N_0', 'g_divide', 'C_3', 'C_4', 'g_mul', 'V_0', 'V_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['cal_circle_area', 'N_0']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0', 'g_double', 'V_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1', 'g_double', 'V_0']], [['g_bili', 'N_0', 'C_4', 'N_1']], [['g_bili', 'C_5', 'C_3', 'N_1', 'g_mul', 'V_0', 'N_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'gougu_minus', 'N_1', 'V_0']], [['cal_cone', 'N_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'V_0', 'N_1']], [['g_bili', 'N_0', 'C_4', 'N_1', 'cal_cone', 'V_0', 'N_0']], [['gougu_minus', 'N_0', 'N_1', 'cal_circle_area', 'V_0']], [['gougu_minus', 'N_1', 'N_2', 'cal_cone', 'N_1', 'V_0']], [['g_bili', 'N_0', 'C_4', 'C_2']], [['cal_cone', 'N_0', 'N_1']], [['g_half', 'N_0', 'cal_cone', 'V_0', 'N_1']], [['cal_cone', 'N_0', 'N_1']], [['g_bili', 'N_0', 'N_1', 'C_4', 'cal_cone', 'V_0', 'N_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'V_0', 'C_4']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_divide', 'N_2', 'N_3', 'g_divide', 'N_4', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_bili', 'V_0', 'N_0', 'N_2']]] +selected_programs [[['g_half', 'N_0', 'g_mul', 'V_0', 'N_1', 'g_mul', 'V_1', 'V_0']], [['g_double', 'N_0', 'g_double', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_half', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_mul', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0', 'g_minus', 'V_1', 'N_0']], [['g_add', 'N_0', 'C_2', 'g_half', 'V_0', 'g_minus', 'C_3', 'C_2', 'g_minus', 'V_2', 'V_1']], [['g_half', 'N_0', 'gougu_minus', 'N_1', 'V_0', 'g_double', 'V_1']], [['g_half', 'N_0', 'g_half', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_double', 'N_1', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'g_half', 'V_0', 'g_add', 'V_2', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_half', 'N_1', 'g_add', 'N_0', 'V_0', 'g_add', 'V_1', 'V_0']], [['g_half', 'N_0']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_double', 'N_1', 'g_minus', 'N_0', 'V_0']], [['g_add', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0']], [['g_equal', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_double', 'N_0', 'g_add', 'V_1', 'V_0']], [['g_double', 'N_2', 'g_add', 'N_0', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0', 'g_minus', 'N_1', 'V_0']], [['g_minus', 'N_1', 'N_0', 'g_half', 'V_0', 'g_add', 'V_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'V_0', 'N_2', 'gougu_minus', 'V_1', 'V_0', 'g_divide', 'V_2', 'N_2']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']]] +2025-03-24 16:03:04,087 - INFO - allennlp.training.trainer - Validating +2025-03-24 16:03:14,310 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 16:03:14,311 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 16:03:14,312 - INFO - allennlp.training.tensorboard_writer - loss | 0.015 | 1.422 +2025-03-24 16:03:14,313 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 16:03:14,313 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 16:03:14,314 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 16:03:14,314 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.730 +2025-03-24 16:03:14,314 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 16:03:14,315 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 16:03:14,315 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.116 +2025-03-24 16:03:14,316 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.640 +2025-03-24 16:03:14,316 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 16:03:14,317 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 16:03:33,659 - INFO - allennlp.training.trainer - Epoch duration: 0:01:50.074599 +2025-03-24 16:03:33,660 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:03:18 +2025-03-24 16:03:33,661 - INFO - allennlp.training.trainer - Epoch 98/99 +2025-03-24 16:03:33,661 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 16:03:34,011 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 16:03:34,012 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 16:03:34,012 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 16:03:34,013 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 16:03:34,013 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 16:03:34,013 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 16:03:34,013 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 16:03:34,027 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'C_2', 'g_half', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1', 'g_double', 'V_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]] +2025-03-24 16:04:49,386 - INFO - allennlp.training.trainer - Validating +2025-03-24 16:04:58,283 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 16:04:58,284 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 16:04:58,284 - INFO - allennlp.training.tensorboard_writer - loss | 0.015 | 1.418 +2025-03-24 16:04:58,285 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 16:04:58,285 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 16:04:58,286 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 16:04:58,287 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.729 +2025-03-24 16:04:58,287 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 16:04:58,287 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 16:04:58,288 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.119 +2025-03-24 16:04:58,288 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.638 +2025-03-24 16:04:58,289 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 16:04:58,289 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 16:05:21,126 - INFO - allennlp.training.trainer - Epoch duration: 0:01:47.464991 +2025-03-24 16:05:21,126 - INFO - allennlp.training.trainer - Estimated training time remaining: 0:01:39 +2025-03-24 16:05:21,127 - INFO - allennlp.training.trainer - Epoch 99/99 +2025-03-24 16:05:21,127 - INFO - allennlp.training.trainer - Peak CPU memory usage MB: 4772.668 +2025-03-24 16:05:21,499 - INFO - allennlp.training.trainer - GPU 0 memory usage MB: 5 +2025-03-24 16:05:21,500 - INFO - allennlp.training.trainer - GPU 1 memory usage MB: 5 +2025-03-24 16:05:21,501 - INFO - allennlp.training.trainer - GPU 2 memory usage MB: 5 +2025-03-24 16:05:21,501 - INFO - allennlp.training.trainer - GPU 3 memory usage MB: 5 +2025-03-24 16:05:21,501 - INFO - allennlp.training.trainer - GPU 4 memory usage MB: 17662 +2025-03-24 16:05:21,501 - INFO - allennlp.training.trainer - GPU 5 memory usage MB: 5 +2025-03-24 16:05:21,501 - INFO - allennlp.training.trainer - GPU 6 memory usage MB: 5 +2025-03-24 16:05:21,514 - INFO - allennlp.training.trainer - Training +selected_programs [[['g_double', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_mul', 'N_1', 'N_2', 'g_double', 'V_0']], [['g_half', 'N_1']], [['g_double', 'N_0', 'g_minus', 'C_3', 'C_2', 'g_half', 'V_1', 'g_minus', 'V_2', 'V_0']], [['g_double', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_half', 'V_1']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0', 'g_sin', 'V_0', 'g_mul', 'V_1', 'N_1', 'g_double', 'V_2']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_double', 'N_0']], [['g_half', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_2', 'N_1', 'g_minus', 'V_0', 'N_0']], [['g_half', 'N_0', 'g_equal', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_double', 'V_0']], [['g_double', 'N_0', 'g_double', 'N_1', 'g_add', 'V_0', 'V_1']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_3', 'V_0', 'g_minus', 'C_3', 'V_1']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']], [['g_half', 'N_0', 'g_half', 'N_1', 'gougu_minus', 'V_0', 'V_1']], [['g_equal', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_double', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_2', 'N_0']], [['g_half', 'N_0']]] +2025-03-24 16:06:37,015 - INFO - allennlp.training.trainer - Validating +2025-03-24 16:06:45,843 - INFO - allennlp.training.tensorboard_writer - Training | Validation +2025-03-24 16:06:45,843 - INFO - allennlp.training.tensorboard_writer - gpu_5_memory_MB | 5.000 | N/A +2025-03-24 16:06:45,843 - INFO - allennlp.training.tensorboard_writer - loss | 0.014 | 1.417 +2025-03-24 16:06:45,844 - INFO - allennlp.training.tensorboard_writer - gpu_1_memory_MB | 5.000 | N/A +2025-03-24 16:06:45,845 - INFO - allennlp.training.tensorboard_writer - gpu_4_memory_MB | 17662.000 | N/A +2025-03-24 16:06:45,845 - INFO - allennlp.training.tensorboard_writer - gpu_2_memory_MB | 5.000 | N/A +2025-03-24 16:06:45,845 - INFO - allennlp.training.tensorboard_writer - BLEU | N/A | 0.727 +2025-03-24 16:06:45,846 - INFO - allennlp.training.tensorboard_writer - cpu_memory_MB | 4772.668 | N/A +2025-03-24 16:06:45,846 - INFO - allennlp.training.tensorboard_writer - gpu_0_memory_MB | 5.000 | N/A +2025-03-24 16:06:45,847 - INFO - allennlp.training.tensorboard_writer - no_result | 0.000 | 0.118 +2025-03-24 16:06:45,847 - INFO - allennlp.training.tensorboard_writer - acc | 0.000 | 0.642 +2025-03-24 16:06:45,848 - INFO - allennlp.training.tensorboard_writer - gpu_6_memory_MB | 5.000 | N/A +2025-03-24 16:06:45,848 - INFO - allennlp.training.tensorboard_writer - gpu_3_memory_MB | 5.000 | N/A +2025-03-24 16:07:07,315 - INFO - allennlp.training.trainer - Epoch duration: 0:01:46.187999 +2025-03-24 16:07:07,357 - INFO - allennlp.training.checkpointer - loading best weights +2025-03-24 16:07:08,001 - INFO - allennlp.commands.train - The model will be evaluated using the best epoch weights. +2025-03-24 16:07:08,004 - INFO - allennlp.training.util - Iterating over dataset +selected_programs [[['g_minus', 'C_3', 'N_0']], [['g_minus', 'N_0', 'N_1', 'g_minus', 'V_0', 'N_0', 'g_half', 'V_1']], [['g_minus', 'N_0', 'N_1', 'g_half', 'V_0']], [['g_half', 'N_0']], [['g_equal', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_half', 'N_0', 'g_minus', 'C_2', 'V_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_minus', 'C_3', 'N_1', 'g_minus', 'C_3', 'N_0', 'g_minus', 'V_1', 'V_0']], [['g_minus', 'N_1', 'N_0']], [['g_minus', 'C_3', 'N_0']], [['g_add', 'N_0', 'N_1']], [['g_double', 'N_0']], [['g_minus', 'C_2', 'N_0']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_double', 'N_0', 'g_minus', 'V_0', 'N_1']], [['g_add', 'N_1', 'N_2', 'g_bili', 'N_0', 'N_1', 'V_0']], [['g_minus', 'N_0', 'N_1']], [['g_minus', 'N_1', 'N_0']], [['g_half', 'N_1', 'gougu_minus', 'N_0', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_minus', 'C_3', 'C_2', 'g_minus', 'V_0', 'N_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_half', 'V_0']], [['g_minus', 'N_0', 'C_1', 'g_sin', 'V_0', 'g_minus', 'N_0', 'V_1']], [['g_add', 'C_2', 'N_0', 'g_minus', 'C_2', 'V_0']], [['g_minus', 'C_3', 'N_0', 'g_minus', 'C_4', 'V_0', 'g_minus', 'V_1', 'N_1', 'g_minus', 'V_2', 'N_1']], [['g_minus', 'C_2', 'N_0', 'g_minus', 'C_2', 'N_2', 'g_minus', 'V_1', 'V_0']], [['g_half', 'N_0', 'gougu_minus', 'V_0', 'N_1', 'g_double', 'V_1']], [['gougu_minus', 'N_1', 'N_0', 'g_add', 'V_0', 'N_0']]] diff --git a/vocabulary/non_padded_namespaces.txt b/vocabulary/non_padded_namespaces.txt new file mode 100644 index 0000000000000000000000000000000000000000..d5dee50c4400824e195a609940a9f9c9abad69b5 --- /dev/null +++ b/vocabulary/non_padded_namespaces.txt @@ -0,0 +1,2 @@ +*tags +*labels diff --git a/vocabulary/tokens.txt b/vocabulary/tokens.txt new file mode 100644 index 0000000000000000000000000000000000000000..87dd25151a4bbf27df5c854daba4c88a934df1f3 --- /dev/null +++ b/vocabulary/tokens.txt @@ -0,0 +1,43 @@ +@@UNKNOWN@@ +N_0 +@start@ +@end@ +V_0 +g_minus +N_1 +g_half +V_1 +C_3 +g_double +N_2 +g_add +C_2 +g_divide +g_bili +g_mul +V_2 +gougu_minus +gougu_add +g_equal +C_4 +g_sin +N_3 +g_tan +cal_circle_area +cal_cone +N_4 +g_cos +C_1 +N_5 +C_5 +C_0 +N_6 +N_7 +cal_circle_perimeter +N_8 +N_9 +g_asin +N_11 +g_acos +N_10 +C_6