diff --git a/outputs/args_la.json b/outputs/args_la.json new file mode 100644 index 0000000000000000000000000000000000000000..dcf95e1f0b0c9a63479e9fbd2beaeb698387543c --- /dev/null +++ b/outputs/args_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dec833e18809abcd83d6a58fc8f1515a28191bebcbc44bd7610cc314e5b24a53 +size 1109 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log new file mode 100644 index 0000000000000000000000000000000000000000..0a304d04ca0445bc55e7d86154a9cbed9649e031 --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log @@ -0,0 +1,852 @@ +06/01/2024 11:55:49 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 11:55:49 - INFO - __main__ - ***** Starting script ***** +06/01/2024 11:55:50 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 11:55:51 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/adapter_config.json +06/01/2024 11:55:51 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'. +06/01/2024 11:55:51 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_adapter.bin +06/01/2024 11:55:51 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/head_config.json +06/01/2024 11:55:51 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 11:55:51 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_model_head.bin +06/01/2024 11:55:51 - INFO - __main__ - Adapter Name = cola +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight +06/01/2024 11:55:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias +06/01/2024 11:55:51 - INFO - __main__ - heads.cola.1.weight +06/01/2024 11:55:51 - INFO - __main__ - heads.cola.1.bias +06/01/2024 11:55:51 - INFO - __main__ - heads.cola.4.weight +06/01/2024 11:55:51 - INFO - __main__ - heads.cola.4.bias +06/01/2024 11:55:54 - INFO - __main__ - Sample 3397 of the training set: {'input_ids': [101, 9901, 13303, 2044, 1996, 12383, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:55:54 - INFO - __main__ - Sample 2366 of the training set: {'input_ids': [101, 1996, 3586, 3631, 10647, 1005, 1055, 12277, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:55:54 - INFO - __main__ - Sample 2356 of the training set: {'input_ids': [101, 2057, 19379, 26860, 2094, 4981, 2083, 1996, 4624, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:00:43 - INFO - __main__ - f_mu shape : torch.Size([1043, 2]) +06/01/2024 12:00:43 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2]) +06/01/2024 12:00:43 - INFO - __main__ - tensor([[ 0.0578, -0.1273], + [ 0.0141, -0.0255], + [ 0.0414, -0.0381], + ..., + [-0.0166, -0.0405], + [ 0.0134, -0.0715], + [-0.0382, 0.0213]], device='cuda:0') +06/01/2024 12:00:43 - INFO - __main__ - tensor([[[12.9645, 12.7884], + [12.7884, 12.9717]], + + [[11.9546, 11.6422], + [11.6422, 12.0088]], + + [[12.1316, 11.8974], + [11.8974, 12.1370]], + + ..., + + [[13.6458, 13.3185], + [13.3185, 13.6473]], + + [[11.7474, 11.5666], + [11.5666, 11.7562]], + + [[12.1346, 11.8893], + [11.8893, 12.1221]]], device='cuda:0') +06/01/2024 12:00:43 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:00:48 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:00:48 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:00:49 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:00:50 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/adapter_config.json +06/01/2024 12:00:50 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'. +06/01/2024 12:00:50 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_adapter.bin +06/01/2024 12:00:50 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/head_config.json +06/01/2024 12:00:50 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:00:50 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_model_head.bin +06/01/2024 12:00:50 - INFO - __main__ - Adapter Name = cola +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight +06/01/2024 12:00:50 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias +06/01/2024 12:00:50 - INFO - __main__ - heads.cola.1.weight +06/01/2024 12:00:50 - INFO - __main__ - heads.cola.1.bias +06/01/2024 12:00:50 - INFO - __main__ - heads.cola.4.weight +06/01/2024 12:00:50 - INFO - __main__ - heads.cola.4.bias +06/01/2024 12:00:54 - INFO - __main__ - Sample 3397 of the training set: {'input_ids': [101, 9901, 13303, 2044, 1996, 12383, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:00:54 - INFO - __main__ - Sample 2366 of the training set: {'input_ids': [101, 1996, 3586, 3631, 10647, 1005, 1055, 12277, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:00:54 - INFO - __main__ - Sample 2356 of the training set: {'input_ids': [101, 2057, 19379, 26860, 2094, 4981, 2083, 1996, 4624, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:05:44 - INFO - __main__ - f_mu shape : torch.Size([1043, 2]) +06/01/2024 12:05:44 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2]) +06/01/2024 12:05:44 - INFO - __main__ - tensor([[-1.5606e+00, 1.6427e+00], + [-1.0038e+00, 1.0276e+00], + [-1.1142e+00, 1.1758e+00], + ..., + [-2.2176e+00, 2.2876e+00], + [ 1.7365e-03, -1.2060e-01], + [ 4.5788e-02, -1.5021e-01]], device='cuda:0') +06/01/2024 12:05:44 - INFO - __main__ - tensor([[[4.1708, 1.7396], + [1.7396, 4.4299]], + + [[2.7875, 1.4555], + [1.4555, 2.9777]], + + [[3.2805, 1.2788], + [1.2788, 3.6008]], + + ..., + + [[4.2817, 3.0575], + [3.0575, 4.4446]], + + [[2.9753, 0.4526], + [0.4526, 3.1586]], + + [[2.8956, 0.2997], + [0.2997, 3.0948]]], device='cuda:0') +06/01/2024 12:05:44 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:05:49 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:05:49 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:05:50 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:05:51 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/adapter_config.json +06/01/2024 12:05:51 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'. +06/01/2024 12:05:51 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_adapter.bin +06/01/2024 12:05:51 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/head_config.json +06/01/2024 12:05:51 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:05:51 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_model_head.bin +06/01/2024 12:05:51 - INFO - __main__ - Adapter Name = cola +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight +06/01/2024 12:05:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias +06/01/2024 12:05:51 - INFO - __main__ - heads.cola.1.weight +06/01/2024 12:05:51 - INFO - __main__ - heads.cola.1.bias +06/01/2024 12:05:51 - INFO - __main__ - heads.cola.4.weight +06/01/2024 12:05:51 - INFO - __main__ - heads.cola.4.bias +06/01/2024 12:05:54 - INFO - __main__ - Sample 3397 of the training set: {'input_ids': [101, 9901, 13303, 2044, 1996, 12383, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:05:54 - INFO - __main__ - Sample 2366 of the training set: {'input_ids': [101, 1996, 3586, 3631, 10647, 1005, 1055, 12277, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:05:54 - INFO - __main__ - Sample 2356 of the training set: {'input_ids': [101, 2057, 19379, 26860, 2094, 4981, 2083, 1996, 4624, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:10:44 - INFO - __main__ - f_mu shape : torch.Size([1043, 2]) +06/01/2024 12:10:44 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2]) +06/01/2024 12:10:44 - INFO - __main__ - tensor([[-2.2763, 2.3324], + [-1.8188, 1.8609], + [-1.6634, 1.6638], + ..., + [-3.0412, 3.1232], + [-0.9870, 0.8577], + [-0.5357, 0.3929]], device='cuda:0') +06/01/2024 12:10:44 - INFO - __main__ - tensor([[[ 4.2765, 2.1601], + [ 2.1601, 4.4863]], + + [[ 3.1863, 1.3376], + [ 1.3376, 3.4189]], + + [[ 3.6683, 0.5845], + [ 0.5845, 4.1141]], + + ..., + + [[ 4.6443, 3.8401], + [ 3.8401, 4.7094]], + + [[ 5.9511, -2.9900], + [-2.9900, 6.5578]], + + [[ 5.5230, -3.1333], + [-3.1333, 6.1906]]], device='cuda:0') +06/01/2024 12:10:44 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:10:49 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:10:49 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:10:50 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:10:51 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/adapter_config.json +06/01/2024 12:10:51 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'. +06/01/2024 12:10:51 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_adapter.bin +06/01/2024 12:10:51 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/head_config.json +06/01/2024 12:10:51 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:10:51 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_model_head.bin +06/01/2024 12:10:51 - INFO - __main__ - Adapter Name = cola +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight +06/01/2024 12:10:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias +06/01/2024 12:10:51 - INFO - __main__ - heads.cola.1.weight +06/01/2024 12:10:51 - INFO - __main__ - heads.cola.1.bias +06/01/2024 12:10:51 - INFO - __main__ - heads.cola.4.weight +06/01/2024 12:10:51 - INFO - __main__ - heads.cola.4.bias +06/01/2024 12:10:54 - INFO - __main__ - Sample 3397 of the training set: {'input_ids': [101, 9901, 13303, 2044, 1996, 12383, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:10:54 - INFO - __main__ - Sample 2366 of the training set: {'input_ids': [101, 1996, 3586, 3631, 10647, 1005, 1055, 12277, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:10:54 - INFO - __main__ - Sample 2356 of the training set: {'input_ids': [101, 2057, 19379, 26860, 2094, 4981, 2083, 1996, 4624, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:15:51 - INFO - __main__ - f_mu shape : torch.Size([1043, 2]) +06/01/2024 12:15:51 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2]) +06/01/2024 12:15:51 - INFO - __main__ - tensor([[-2.4427, 2.6179], + [-1.8617, 1.9990], + [-1.7362, 1.8366], + ..., + [-3.4044, 3.5965], + [-2.7229, 2.8187], + [-2.1238, 2.1852]], device='cuda:0') +06/01/2024 12:15:51 - INFO - __main__ - tensor([[[ 4.2594, 2.3833], + [ 2.3833, 4.3946]], + + [[ 3.1920, 1.2501], + [ 1.2501, 3.4573]], + + [[ 3.6047, 0.5266], + [ 0.5266, 4.1780]], + + ..., + + [[ 4.9522, 4.3707], + [ 4.3707, 4.9895]], + + [[ 5.7035, 0.4758], + [ 0.4758, 6.6389]], + + [[ 7.4642, -3.3615], + [-3.3615, 8.7533]]], device='cuda:0') +06/01/2024 12:15:51 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:15:55 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:15:55 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:15:56 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:15:57 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/adapter_config.json +06/01/2024 12:15:57 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'. +06/01/2024 12:15:57 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_adapter.bin +06/01/2024 12:15:57 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/head_config.json +06/01/2024 12:15:57 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:15:57 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_model_head.bin +06/01/2024 12:15:57 - INFO - __main__ - Adapter Name = cola +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight +06/01/2024 12:15:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias +06/01/2024 12:15:57 - INFO - __main__ - heads.cola.1.weight +06/01/2024 12:15:57 - INFO - __main__ - heads.cola.1.bias +06/01/2024 12:15:57 - INFO - __main__ - heads.cola.4.weight +06/01/2024 12:15:57 - INFO - __main__ - heads.cola.4.bias +06/01/2024 12:16:02 - INFO - __main__ - Sample 3397 of the training set: {'input_ids': [101, 9901, 13303, 2044, 1996, 12383, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:16:02 - INFO - __main__ - Sample 2366 of the training set: {'input_ids': [101, 1996, 3586, 3631, 10647, 1005, 1055, 12277, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:16:02 - INFO - __main__ - Sample 2356 of the training set: {'input_ids': [101, 2057, 19379, 26860, 2094, 4981, 2083, 1996, 4624, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:21:09 - INFO - __main__ - f_mu shape : torch.Size([1043, 2]) +06/01/2024 12:21:09 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2]) +06/01/2024 12:21:09 - INFO - __main__ - tensor([[-2.7018, 2.9278], + [-2.1563, 2.3556], + [-1.7670, 1.8868], + ..., + [-3.7648, 3.9964], + [-2.8124, 2.9454], + [-1.9931, 2.0436]], device='cuda:0') +06/01/2024 12:21:09 - INFO - __main__ - tensor([[[ 4.7950, 1.9634], + [ 1.9634, 4.9622]], + + [[ 3.6191, 1.1228], + [ 1.1228, 3.9051]], + + [[ 4.1466, -0.2768], + [-0.2768, 4.9214]], + + ..., + + [[ 5.2033, 4.4562], + [ 4.4562, 5.2588]], + + [[ 6.5369, -0.6365], + [-0.6365, 7.5989]], + + [[ 9.1744, -5.8993], + [-5.8993, 10.9482]]], device='cuda:0') +06/01/2024 12:21:09 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:21:14 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:21:14 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:21:16 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:21:16 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/adapter_config.json +06/01/2024 12:21:16 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'. +06/01/2024 12:21:16 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_adapter.bin +06/01/2024 12:21:16 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/head_config.json +06/01/2024 12:21:16 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:21:16 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_model_head.bin +06/01/2024 12:21:16 - INFO - __main__ - Adapter Name = cola +06/01/2024 12:21:16 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:16 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:16 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:16 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:16 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight +06/01/2024 12:21:17 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias +06/01/2024 12:21:17 - INFO - __main__ - heads.cola.1.weight +06/01/2024 12:21:17 - INFO - __main__ - heads.cola.1.bias +06/01/2024 12:21:17 - INFO - __main__ - heads.cola.4.weight +06/01/2024 12:21:17 - INFO - __main__ - heads.cola.4.bias +06/01/2024 12:21:20 - INFO - __main__ - Sample 3397 of the training set: {'input_ids': [101, 9901, 13303, 2044, 1996, 12383, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:21:20 - INFO - __main__ - Sample 2366 of the training set: {'input_ids': [101, 1996, 3586, 3631, 10647, 1005, 1055, 12277, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:21:20 - INFO - __main__ - Sample 2356 of the training set: {'input_ids': [101, 2057, 19379, 26860, 2094, 4981, 2083, 1996, 4624, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:26:23 - INFO - __main__ - f_mu shape : torch.Size([1043, 2]) +06/01/2024 12:26:23 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2]) +06/01/2024 12:26:23 - INFO - __main__ - tensor([[-2.9716, 3.2141], + [-2.3593, 2.5730], + [-1.9422, 2.0817], + ..., + [-3.9324, 4.1705], + [-3.0374, 3.1900], + [-1.5928, 1.5935]], device='cuda:0') +06/01/2024 12:26:23 - INFO - __main__ - tensor([[[ 4.9167, 2.1319], + [ 2.1319, 5.0604]], + + [[ 3.8028, 1.1293], + [ 1.1293, 4.0759]], + + [[ 4.5062, -0.5867], + [-0.5867, 5.3611]], + + ..., + + [[ 5.2096, 4.3945], + [ 4.3945, 5.2706]], + + [[ 6.9878, -0.8966], + [-0.8966, 8.0646]], + + [[ 9.9758, -7.6343], + [-7.6343, 11.9324]]], device='cuda:0') +06/01/2024 12:26:23 - INFO - __main__ - ***** Completed training ***** diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..0cdc7ef6298e6b1345a6d22013ede2b8db4fb730 --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fa753ec8c196b6ba25d397655827b50ba5e4e8ab3a0611696974cb2b176b709 +size 51 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..d6a7633698d67a52cb956e0e4975b69b037d48d0 --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54df68e9df6ab9cdfff8670128c4ae7855dd74634433c34c8bba67be1e7133fe +size 175602 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..66c035b7838fe4daff800a1a78f603675e8933cd --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95e66d4b04c437fd3ccaffc68e04f9fd2e618372223261826928f3b73f44e7c0 +size 6119 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..0ac8a31c88a113d918c5419b25ac17199b92bc56 --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7e4aa22e380a680fd4a19f99c0eb816c1aae5d95049578a40b736bc9e748b8a +size 49 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..481db9772ebc5d0908c663db1aeb0750ca8902de --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:311348bd7650a8f3a355048163b4f943fb66e1e2f0dd3bb51fdeaf84ef29eef8 +size 176263 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..c4c547f6188e6b85976819840ba768d94bc30e25 --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c033519eef70ba83c5d56b44f0bbce32998e594d87922adfc87a047d5afdd226 +size 6136 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..937e4a3c568390daf7784516aea67ed00d3b11f6 --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12595e64dabbe54a88115349f4e3a6060d2960b905e840e2f4f7adc226c3dacf +size 49 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..29c12f88afb08b2894fdeeb3ccd50b610c9e4fd0 --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18c6f66a36c141b6cf55345c06acc811085e79f70323c696b9b4ef8c8a66fc26 +size 176796 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..ee073c2f0dc39ae131b5dbb9d030aa36c75b4cb2 --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbf247548110d6eb743395d6cc995cf4dc124733f6fd2c09e0c22d7d4e360904 +size 6144 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..4b991068cd6e4fa75e4cb43ed16b546ef93ca93c --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f65b5125b231f4c170f931737ff92430d26d6c425fac07c8e6bd7e8f6b82e45 +size 49 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..79603458d26662bff81d9c2f2db7bd9965a416cf --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2207108a665da35372314f7bf8dcb6793f4632cfa96a3ffb5f38571c3194d27 +size 176990 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..0995c6e0d427afc0cf4a253e33eca57b4469e4b9 --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b7a3635831442acc4ebfce084fc82ac522911c3c939c9bcc15e2d2872d3c19d +size 6155 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..a8f07457e973bdf9171f307b2d5d5a54d01f599b --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f583a0ab2f642aa1ff56b5493e1e6845d5bd4d6440091a8dbd30c69223fcd290 +size 49 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..4bdb123bbcb060a8c02f595fab41edee4e3d260d --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e004f76425a6dcabe795fa4f58e43f8473366b1d64aeba5826173b3c97e16bc8 +size 177233 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..15669e423c07a724c10ff8ba20b621978d98cbdb --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:949998cdd85f8fe1da2e099e65f3d64761bdf65f9ccb3bac58ebcb7985b18898 +size 6162 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..59802f8d2058fe4b95ef2563a627c74903e3596f --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4349923e2abe2a9a9e0aec6228bc19283b7531532d5e3089dfb7d93b460c85dd +size 49 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..c0de2cd57892132c67a4dcf4aacff9f3b6d08e8e --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55e72b1e1981b619c147da31387e7eddaeeceb3eacb4d3e42ecaf3e0c5325352 +size 177446 diff --git a/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..62c859a9a7ce24d9d3f822276c1930960ac09fc7 --- /dev/null +++ b/outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0360d311ca735a14bfd39cf6b4fdfdfd9198c0da9f6fe6b494b049d612c4bf +size 6169 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/logfile_la_{args.laplace_sub}.log b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/logfile_la_{args.laplace_sub}.log new file mode 100644 index 0000000000000000000000000000000000000000..ff543bc0ddde623db234766708f53d069af98144 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/logfile_la_{args.laplace_sub}.log @@ -0,0 +1,7 @@ +06/01/2024 11:25:34 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 11:25:34 - INFO - __main__ - ***** Starting script ***** +06/01/2024 11:25:35 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 11:25:36 - INFO - adapters.utils - Attempting to load adapter from source 'hf'... +06/01/2024 11:25:36 - INFO - adapters.utils - Repo id must be in the form 'repo_name' or 'namespace/repo_name': './outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0'. Use `repo_type` argument if needed. +06/01/2024 11:25:36 - INFO - adapters.utils - Attempting to load adapter from source 'ah'... +06/01/2024 11:25:36 - INFO - adapters.utils - No adapter with name './outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0' was found in the adapter index. diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log new file mode 100644 index 0000000000000000000000000000000000000000..4497a79df2fa24e99ab93877b7b5b23e5b9cf01d --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log @@ -0,0 +1,3258 @@ +06/01/2024 11:38:00 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 11:38:00 - INFO - __main__ - ***** Starting script ***** +06/01/2024 11:38:01 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 11:38:02 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/adapter_config.json +06/01/2024 11:38:02 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 11:38:02 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_adapter.bin +06/01/2024 11:38:02 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/head_config.json +06/01/2024 11:38:02 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 11:38:02 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_model_head.bin +06/01/2024 11:38:02 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:38:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:38:02 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 11:38:02 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 11:38:02 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 11:38:02 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 11:38:09 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [101, 1000, 5262, 1010, 4238, 2323, 2022, 2404, 2006, 5060, 2008, 4073, 2000, 3046, 2000, 12661, 5712, 1999, 2037, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102, 1000, 4238, 2323, 2022, 2006, 5060, 2008, 4740, 2000, 12661, 5712, 1999, 4238, 1005, 1055, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:38:09 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [101, 3962, 2751, 2001, 9339, 2012, 1002, 4029, 2581, 1012, 3938, 1013, 4029, 2620, 1012, 3438, 2019, 19471, 2012, 6694, 13938, 2102, 1010, 2383, 9847, 2039, 2000, 1002, 4029, 2683, 1012, 2753, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2184, 1012, 102, 3962, 2751, 2001, 9339, 2012, 1002, 3486, 2620, 1012, 3515, 1013, 3486, 2683, 1012, 2321, 2019, 19471, 2012, 28714, 2692, 13938, 2102, 1010, 2383, 14051, 2004, 2152, 2004, 1002, 3486, 2683, 1012, 2423, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2423, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 11:38:09 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [101, 12411, 2015, 1012, 2198, 11260, 1998, 3960, 5846, 6430, 29492, 2000, 3713, 1012, 102, 1996, 2053, 1011, 3065, 2020, 12411, 2015, 1012, 2198, 11260, 1997, 4404, 1998, 3960, 5846, 1997, 3516, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:40:31 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 11:40:31 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 11:40:31 - INFO - __main__ - tensor([[ 4.3619e-02, 3.6406e-02], + [ 6.4158e-02, -1.4321e-02], + [ 6.9159e-02, -2.6484e-02], + [ 7.6602e-02, 1.4325e-02], + [ 7.3146e-02, -4.5629e-02], + [ 1.7703e-02, 4.4721e-02], + [ 2.5790e-02, -1.4651e-02], + [ 7.6150e-02, -3.7050e-02], + [ 7.5941e-02, -5.9577e-02], + [ 7.4646e-02, -3.6811e-02], + [ 6.1707e-02, -6.5744e-02], + [ 7.8509e-02, -7.9460e-02], + [ 4.7446e-02, -6.6688e-02], + [ 5.7910e-03, -5.1893e-02], + [ 1.2046e-01, -6.3593e-02], + [ 1.9677e-02, -1.0048e-01], + [ 5.3397e-02, -4.7096e-02], + [ 2.4392e-02, 1.1803e-02], + [ 1.0346e-01, -6.0021e-02], + [ 9.7695e-02, -9.4281e-02], + [-3.8722e-02, 7.7105e-02], + [ 8.2233e-02, -1.3620e-01], + [-1.4177e-02, -4.0608e-02], + [ 1.0596e-01, 4.9701e-02], + [ 3.8174e-02, 9.0166e-02], + [-3.3908e-02, -1.0429e-01], + [ 9.0445e-02, -6.4691e-02], + [ 9.5043e-02, -2.0591e-02], + [ 9.3471e-02, 6.6194e-02], + [ 4.3644e-02, -1.7567e-02], + [ 1.9046e-01, -2.0973e-02], + [ 1.5003e-01, -6.0300e-02], + [ 4.8512e-02, 4.3873e-03], + [-4.4670e-03, -6.3922e-02], + [ 1.0752e-01, 2.3950e-02], + [ 9.6233e-02, -3.5892e-02], + [-1.9231e-02, 5.0670e-02], + [ 4.0359e-02, -5.1438e-02], + [ 5.0531e-02, 2.8183e-02], + [ 6.9121e-02, -3.8421e-02], + [-2.4218e-02, -1.4647e-01], + [-4.8438e-03, -3.2961e-02], + [ 2.1912e-02, -5.6569e-03], + [ 7.0839e-02, -5.4937e-02], + [ 3.1783e-02, -5.1335e-02], + [ 1.2793e-01, -3.8822e-02], + [ 4.1037e-02, -3.3817e-02], + [ 2.4343e-02, 3.0233e-02], + [ 3.9356e-02, 3.8689e-03], + [ 2.8010e-02, 7.6398e-02], + [ 6.4698e-02, -1.0903e-01], + [ 1.8271e-02, -1.4894e-01], + [ 7.0187e-02, -1.2870e-01], + [ 4.9394e-02, -5.5565e-03], + [ 5.4633e-02, 1.6434e-02], + [ 1.0537e-01, -2.4554e-02], + [ 1.5233e-01, 3.0678e-03], + [ 2.5184e-02, -9.4985e-02], + [ 2.2223e-02, -6.2373e-02], + [ 3.3317e-02, -1.0342e-01], + [-5.3489e-02, -6.5831e-02], + [ 2.2368e-02, 3.0225e-02], + [ 7.3287e-02, -3.7810e-02], + [ 3.2924e-02, -1.0642e-01], + [ 4.8612e-02, -7.3861e-03], + [ 1.9567e-02, -2.4926e-02], + [ 9.7314e-02, -9.0506e-02], + [ 1.2048e-01, -6.2819e-02], + [ 5.8020e-02, -4.4150e-02], + [ 1.0669e-01, -7.4518e-02], + [ 5.5153e-02, -4.1653e-02], + [ 2.4278e-01, 3.3665e-02], + [ 2.3360e-02, -1.4883e-01], + [ 2.3484e-03, -3.2870e-02], + [ 1.3485e-02, -5.5986e-02], + [ 2.0594e-02, 4.8492e-02], + [-9.5645e-03, -1.2440e-01], + [ 4.5357e-02, -7.4476e-02], + [ 4.6749e-02, -8.0176e-02], + [ 1.5180e-02, 3.0797e-02], + [ 4.8683e-02, 8.4683e-02], + [ 6.3108e-02, 5.1317e-03], + [ 4.3720e-02, -3.2016e-02], + [-1.8525e-02, -5.8418e-02], + [ 4.2546e-02, -1.1878e-01], + [ 9.9955e-02, -2.4518e-02], + [ 1.5054e-01, 1.3931e-02], + [ 1.5877e-03, -8.2815e-02], + [-6.1991e-02, 1.1436e-01], + [ 1.0354e-01, -8.4428e-02], + [ 5.7084e-02, -2.5690e-02], + [ 1.6396e-02, -5.6194e-02], + [ 4.2302e-02, 9.3638e-03], + [ 7.2770e-02, 1.9010e-02], + [-2.4586e-03, -1.5799e-02], + [ 2.1569e-02, -3.6001e-02], + [ 9.1315e-03, -9.4249e-02], + [ 6.6994e-02, -2.4206e-03], + [ 8.8732e-02, -6.0795e-02], + [-1.5377e-02, -8.5606e-02], + [ 1.3993e-01, -7.5858e-02], + [-4.1973e-02, -1.1683e-01], + [ 4.2117e-02, -8.4780e-02], + [ 4.9305e-02, -8.9579e-03], + [ 1.0425e-01, -3.4364e-02], + [ 7.6529e-02, -1.5942e-02], + [ 5.8913e-02, -2.3002e-02], + [-9.5656e-02, -7.2543e-02], + [-1.5193e-03, -6.7502e-02], + [ 1.0787e-01, -6.1545e-03], + [ 3.3755e-02, -9.9676e-02], + [ 1.2630e-02, 8.8511e-02], + [ 1.0622e-01, -7.2748e-03], + [ 7.8446e-02, -6.7150e-04], + [-8.3213e-03, 2.7228e-03], + [ 3.4771e-02, 8.2124e-02], + [ 3.1440e-02, -7.1212e-02], + [ 8.9207e-02, -4.1815e-02], + [ 7.5659e-02, -9.6401e-02], + [ 7.5079e-02, -5.8172e-02], + [ 1.3996e-01, -1.3917e-02], + [ 8.1245e-02, -5.3766e-02], + [ 9.7681e-02, -7.7481e-02], + [ 6.4096e-02, -5.8440e-02], + [-7.8498e-02, 3.2780e-02], + [ 7.5405e-02, -6.5359e-02], + [ 1.3074e-01, 2.0548e-03], + [ 1.9771e-03, -7.6088e-02], + [ 3.4297e-02, -7.3234e-02], + [ 2.2704e-02, -5.4310e-03], + [ 7.6857e-02, -6.7183e-02], + [ 3.6129e-02, -4.6962e-02], + [-5.2363e-02, -1.6915e-01], + [ 9.0351e-02, -7.4161e-02], + [ 8.7731e-02, -6.9412e-02], + [ 7.2449e-02, -8.1968e-02], + [ 1.5406e-01, -5.0303e-02], + [ 1.0626e-01, -6.7767e-03], + [ 4.0570e-02, 1.4945e-02], + [ 9.3621e-02, -1.6997e-02], + [ 8.7036e-02, -2.6342e-02], + [ 9.0456e-02, -1.1057e-02], + [-6.3116e-02, 5.1270e-02], + [ 6.7507e-02, -9.1216e-02], + [ 8.7095e-02, -1.3862e-01], + [-2.5120e-02, 5.4407e-03], + [ 7.2024e-02, -8.2200e-02], + [ 3.1998e-02, -9.0684e-02], + [ 5.8767e-02, -8.2759e-02], + [ 4.3205e-03, -2.6432e-02], + [ 9.2423e-02, -5.7002e-02], + [ 6.7298e-02, -4.3856e-02], + [ 5.6149e-02, -1.4704e-02], + [ 2.5967e-02, -1.5793e-02], + [ 1.2195e-02, -5.7725e-02], + [ 1.0714e-01, -5.6831e-02], + [ 1.8538e-02, -1.0212e-01], + [ 5.3905e-02, -3.1752e-02], + [ 7.7398e-02, -7.2131e-02], + [ 6.5927e-02, -6.0162e-02], + [-2.0182e-02, 2.4014e-02], + [ 1.0050e-01, -1.3579e-01], + [ 1.1665e-01, -8.8029e-02], + [ 5.5680e-02, -2.4952e-02], + [ 1.4098e-01, -4.2428e-02], + [ 9.3014e-02, -6.4238e-02], + [-4.1881e-02, -6.4491e-02], + [ 1.5052e-02, -1.0118e-01], + [ 5.9035e-02, 2.6372e-03], + [ 6.9535e-02, -3.6275e-02], + [ 4.7671e-02, -5.5103e-03], + [-8.6096e-03, 6.6223e-02], + [ 2.7130e-02, -2.6281e-02], + [ 1.2373e-01, -2.7845e-02], + [ 3.5376e-02, -6.6265e-02], + [ 1.2267e-02, -6.9943e-03], + [ 3.0314e-02, -3.1384e-02], + [-9.9602e-03, -4.0641e-02], + [ 2.5850e-02, 6.9039e-02], + [ 9.5426e-02, -2.6563e-02], + [-2.0618e-02, -2.4742e-02], + [ 6.8333e-02, -1.9283e-01], + [ 1.0771e-01, -5.6405e-02], + [ 7.3956e-02, 4.3962e-04], + [ 1.2150e-01, -5.6749e-02], + [ 9.3508e-02, -6.4907e-02], + [ 9.1338e-02, -1.1445e-01], + [ 2.4685e-02, -2.1142e-02], + [ 6.9704e-02, -7.5265e-02], + [ 3.8533e-02, -7.5783e-02], + [ 1.0852e-01, -3.5450e-02], + [ 5.4030e-02, 3.3229e-02], + [ 5.0454e-02, 7.7015e-03], + [ 9.8760e-02, 1.8111e-02], + [-3.6351e-02, -5.6590e-02], + [ 1.2425e-01, -5.0953e-02], + [ 1.0582e-01, -3.9219e-02], + [ 3.8884e-02, -3.7705e-02], + [ 6.9490e-02, -5.7704e-03], + [ 1.4301e-01, 8.2685e-03], + [ 1.0678e-01, -7.9422e-02], + [ 3.6097e-02, -6.9736e-04], + [ 1.2401e-02, 3.1163e-03], + [ 1.7207e-02, -8.0679e-03], + [ 4.0866e-02, -1.0021e-01], + [ 7.8790e-02, -2.1350e-02], + [-5.8284e-03, -2.8642e-02], + [-4.7759e-02, 1.0824e-01], + [ 5.5798e-02, -1.1131e-01], + [ 7.0790e-02, -2.3227e-02], + [ 1.1566e-01, -6.1746e-03], + [-5.7937e-03, -1.0177e-01], + [ 1.0355e-01, -4.8468e-02], + [ 1.1989e-01, -5.3280e-03], + [ 2.6571e-02, 1.4879e-02], + [ 6.9096e-02, 1.7275e-02], + [ 1.2873e-01, -6.5240e-02], + [ 8.7655e-02, -4.2629e-02], + [-1.1809e-02, -1.7440e-01], + [ 9.6182e-02, -1.1778e-01], + [ 1.3035e-01, -2.3604e-02], + [ 6.1283e-02, -5.5734e-02], + [ 7.8355e-02, -4.0459e-02], + [ 5.4197e-02, -1.2233e-01], + [ 9.8008e-02, 8.2972e-03], + [ 9.2759e-02, -3.2040e-02], + [ 3.6599e-02, -1.8900e-02], + [ 1.0158e-01, -1.3503e-01], + [ 7.4008e-02, -6.6209e-02], + [ 5.5539e-02, -1.9367e-02], + [ 6.3008e-02, -5.1223e-02], + [-6.2546e-03, -7.0841e-02], + [ 3.8766e-02, -4.9786e-02], + [ 8.1548e-02, -4.1715e-02], + [ 5.3011e-02, 5.8142e-03], + [-3.3306e-02, -7.4470e-02], + [-3.0683e-04, -7.7412e-02], + [ 1.1375e-01, -1.0333e-02], + [ 1.2554e-01, -3.5708e-02], + [ 4.5730e-02, -8.1131e-04], + [ 7.6101e-02, -1.4771e-01], + [ 1.7617e-02, -7.7165e-02], + [ 4.3329e-02, -4.6143e-02], + [ 2.8608e-02, 2.0111e-02], + [ 5.1260e-02, -5.9473e-02], + [ 5.1394e-02, -9.5788e-02], + [-6.9449e-03, -1.0472e-01], + [-1.6991e-03, -1.6443e-02], + [ 5.1137e-02, -4.5878e-02], + [ 3.4903e-03, -3.9384e-02], + [ 9.7466e-02, 1.6613e-02], + [-1.3290e-04, 9.4565e-02], + [-2.5159e-02, -3.1226e-02], + [ 5.8335e-02, -1.0250e-01], + [ 2.4796e-02, -2.7224e-02], + [ 1.7937e-02, -4.3980e-02], + [ 1.8546e-02, 8.8767e-03], + [-2.0746e-02, -2.7994e-02], + [ 1.5746e-01, -3.3393e-03], + [ 2.5362e-02, -4.4324e-02], + [-1.1016e-02, 5.0655e-02], + [ 2.5525e-02, -8.7409e-02], + [ 4.1272e-02, -6.6072e-02], + [ 6.6541e-03, 3.9000e-02], + [ 1.4633e-01, 4.1802e-03], + [-2.1641e-03, -5.4232e-02], + [ 5.1968e-02, 3.3756e-02], + [-4.8428e-02, 2.9379e-03], + [ 8.8593e-02, -7.2109e-02], + [ 2.8873e-02, -6.1094e-03], + [ 7.0286e-02, 4.1505e-03], + [ 2.7526e-02, 1.5747e-02], + [-2.1173e-02, -1.2636e-02], + [ 4.9628e-02, -4.3785e-02], + [ 9.6194e-02, -6.4138e-02], + [ 1.4225e-01, 2.3148e-02], + [-2.1227e-02, 4.6929e-02], + [ 4.8589e-02, 2.4404e-02], + [ 2.0000e-02, -9.6059e-02], + [-2.4722e-02, -9.3270e-02], + [ 1.2138e-01, -2.3548e-02], + [ 9.1991e-02, -7.6451e-02], + [ 4.9352e-02, -6.5925e-02], + [-3.0144e-03, -7.6398e-03], + [ 6.4946e-02, -9.3123e-02], + [ 3.5018e-02, 1.6633e-02], + [ 8.5024e-02, -6.3271e-02], + [ 4.7530e-02, 2.0777e-02], + [ 8.8134e-02, -9.2261e-02], + [ 2.5119e-02, -1.0759e-01], + [-1.6002e-03, 8.2385e-02], + [ 4.8289e-02, -3.2412e-02], + [-1.4561e-02, -9.0289e-02], + [ 8.7037e-02, -5.9223e-02], + [-5.4755e-02, -1.8045e-02], + [-6.0445e-02, 1.9310e-02], + [ 2.7725e-02, -5.5796e-02], + [ 9.2170e-03, 7.5185e-03], + [-2.1502e-02, -5.3850e-02], + [-3.8346e-02, 3.0872e-02], + [ 2.0657e-02, -3.3770e-02], + [ 6.9860e-02, -2.4693e-02], + [-1.3221e-02, 3.8244e-02], + [ 1.4191e-01, 2.1350e-02], + [-2.6829e-02, -5.1098e-02], + [ 9.2475e-02, -2.9032e-02], + [ 4.8527e-03, 2.6168e-02], + [ 5.0084e-02, -5.7757e-03], + [ 1.3982e-02, -2.4558e-02], + [ 1.7360e-01, -1.0858e-02], + [ 6.8405e-02, -3.7700e-02], + [-4.1107e-03, 4.5344e-02], + [ 3.5873e-02, -6.8130e-02], + [ 2.0969e-02, -9.4178e-02], + [ 5.5317e-02, -3.5624e-02], + [ 1.2849e-01, -6.3230e-02], + [ 3.2614e-02, -5.0559e-02], + [ 7.9239e-02, -1.2059e-01], + [-1.8250e-02, 7.0185e-03], + [ 1.2272e-02, 3.8380e-02], + [ 4.8014e-02, 1.0617e-01], + [ 1.1757e-01, 8.4055e-02], + [-2.8417e-03, -5.6312e-02], + [ 1.1052e-01, -2.3222e-03], + [ 4.8372e-02, -4.9991e-02], + [ 1.1141e-02, -4.4320e-03], + [ 2.6562e-02, -5.1109e-03], + [ 1.1860e-01, 2.2663e-02], + [ 3.8292e-02, 2.6550e-02], + [ 4.3729e-02, -7.6476e-02], + [ 6.7613e-02, -1.0543e-01], + [ 9.6913e-02, -2.1169e-02], + [ 5.2962e-02, -7.7915e-02], + [ 5.1090e-02, -8.8784e-02], + [ 1.0482e-01, -7.4869e-02], + [ 4.7130e-02, 3.7699e-02], + [-3.1602e-02, 3.9946e-02], + [ 7.0122e-02, -2.4705e-02], + [ 1.2972e-01, -2.0267e-02], + [ 8.0547e-02, 1.7132e-02], + [ 8.7152e-02, -6.7753e-02], + [ 7.4308e-02, -1.3995e-02], + [ 2.2770e-02, -9.3552e-02], + [ 1.4908e-02, -4.8667e-02], + [ 1.2903e-01, -1.7709e-02], + [ 1.0614e-01, -6.1835e-03], + [ 2.2762e-02, -2.9710e-02], + [-8.5787e-03, -2.1884e-02], + [ 1.2974e-03, -1.1277e-01], + [ 8.0414e-02, -1.4486e-02], + [ 1.9404e-01, -6.9679e-02], + [ 5.0444e-02, -3.2422e-02], + [ 1.4354e-01, 2.4683e-02], + [ 8.8655e-02, -6.2823e-02], + [ 1.2527e-01, -7.1614e-02], + [ 1.4545e-02, 4.6806e-02], + [ 1.4283e-01, -7.1127e-02], + [ 1.5795e-01, 1.0457e-02], + [ 8.0536e-02, -1.6950e-02], + [ 2.8944e-02, 1.3863e-02], + [ 1.3995e-01, -2.4669e-02], + [-3.3785e-03, 1.2475e-02], + [ 2.9219e-02, 1.4842e-02], + [ 4.2929e-02, -3.6724e-02], + [ 2.2358e-02, -9.3353e-02], + [ 1.1346e-01, -3.4184e-02], + [ 6.8978e-02, -1.2230e-03], + [ 1.5268e-02, 7.7305e-03], + [ 1.1290e-03, -6.6546e-02], + [-2.2261e-02, -6.5825e-02], + [ 6.7634e-02, -1.1689e-01], + [ 9.0439e-02, -2.3027e-02], + [-8.7674e-03, -7.8216e-02], + [ 9.2149e-02, -9.2917e-02], + [-1.2312e-02, -1.1869e-01], + [ 5.8126e-02, -8.5119e-03], + [ 4.9872e-02, -7.3259e-02], + [ 2.7653e-02, -7.6897e-02], + [ 8.4472e-03, -4.3210e-02], + [ 1.3063e-01, -4.2329e-02], + [ 6.7662e-02, -8.8011e-02], + [ 2.1148e-02, 9.1221e-03], + [ 4.6650e-02, -2.5197e-02], + [ 9.8866e-02, -1.2805e-01], + [ 1.0514e-01, -7.6708e-02], + [ 1.0594e-01, -9.7416e-02], + [ 7.8640e-02, -4.0948e-02], + [ 4.7401e-03, -3.9827e-02], + [ 4.2988e-02, -8.0165e-02], + [ 1.4310e-01, -1.7900e-02], + [ 2.3904e-02, -1.0336e-01], + [ 4.2694e-02, -2.1572e-02], + [ 1.0950e-01, -1.3200e-02], + [ 6.9645e-02, 3.1929e-03], + [ 3.4049e-02, 1.9807e-02], + [ 3.6387e-02, 3.2661e-03], + [ 6.0490e-02, 3.5406e-02], + [ 1.3914e-02, -4.2947e-02], + [-4.9759e-02, -5.6561e-02], + [ 3.5374e-02, -4.0978e-02], + [ 5.4932e-02, -7.9101e-02], + [ 8.0664e-02, -1.3374e-02], + [ 5.4038e-02, -7.3913e-02], + [ 5.6266e-02, -1.6981e-02], + [ 1.8709e-02, -1.8246e-01], + [ 8.6990e-02, -4.7094e-02], + [ 8.0432e-03, 7.7781e-02], + [ 9.2226e-02, -7.4906e-02]], device='cuda:0') +06/01/2024 11:40:31 - INFO - __main__ - tensor([[[16.0826, 15.6384], + [15.6384, 16.0831]], + + [[15.4924, 14.8310], + [14.8310, 15.4924]], + + [[15.5807, 15.2125], + [15.2125, 15.6383]], + + ..., + + [[16.3047, 15.8002], + [15.8002, 16.2975]], + + [[16.0154, 15.4070], + [15.4070, 16.0603]], + + [[15.9723, 15.2423], + [15.2423, 15.9817]]], device='cuda:0') +06/01/2024 11:40:31 - INFO - __main__ - ***** Completed training ***** +06/01/2024 11:40:37 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 11:40:37 - INFO - __main__ - ***** Starting script ***** +06/01/2024 11:40:38 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 11:40:38 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/adapter_config.json +06/01/2024 11:40:38 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 11:40:39 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_adapter.bin +06/01/2024 11:40:39 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/head_config.json +06/01/2024 11:40:39 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 11:40:39 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_model_head.bin +06/01/2024 11:40:39 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:40:39 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:40:39 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 11:40:39 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 11:40:39 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 11:40:39 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 11:40:45 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [101, 1000, 5262, 1010, 4238, 2323, 2022, 2404, 2006, 5060, 2008, 4073, 2000, 3046, 2000, 12661, 5712, 1999, 2037, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102, 1000, 4238, 2323, 2022, 2006, 5060, 2008, 4740, 2000, 12661, 5712, 1999, 4238, 1005, 1055, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:40:45 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [101, 3962, 2751, 2001, 9339, 2012, 1002, 4029, 2581, 1012, 3938, 1013, 4029, 2620, 1012, 3438, 2019, 19471, 2012, 6694, 13938, 2102, 1010, 2383, 9847, 2039, 2000, 1002, 4029, 2683, 1012, 2753, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2184, 1012, 102, 3962, 2751, 2001, 9339, 2012, 1002, 3486, 2620, 1012, 3515, 1013, 3486, 2683, 1012, 2321, 2019, 19471, 2012, 28714, 2692, 13938, 2102, 1010, 2383, 14051, 2004, 2152, 2004, 1002, 3486, 2683, 1012, 2423, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2423, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 11:40:45 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [101, 12411, 2015, 1012, 2198, 11260, 1998, 3960, 5846, 6430, 29492, 2000, 3713, 1012, 102, 1996, 2053, 1011, 3065, 2020, 12411, 2015, 1012, 2198, 11260, 1997, 4404, 1998, 3960, 5846, 1997, 3516, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:43:08 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 11:43:08 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 11:43:08 - INFO - __main__ - tensor([[-3.6482, 3.9539], + [ 1.2714, -1.5069], + [ 0.1529, -0.1332], + [-2.1353, 2.4225], + [ 0.8169, -1.2001], + [-1.9814, 2.0785], + [-3.2965, 3.5268], + [-1.4121, 1.6208], + [-2.9330, 3.2579], + [-2.7371, 3.0084], + [-3.7521, 3.9152], + [ 2.5908, -2.8644], + [ 0.0957, -0.1038], + [-2.0089, 2.2228], + [-2.1794, 2.3240], + [-2.2931, 2.1602], + [-3.2345, 3.5909], + [-0.5519, 0.6938], + [-2.6449, 2.9476], + [ 0.3888, -0.5172], + [ 1.0474, -1.1366], + [-2.1811, 2.3707], + [-0.6716, 0.5110], + [-2.4034, 2.6301], + [-1.5720, 1.6388], + [ 0.4994, -0.8180], + [-1.3534, 1.3697], + [-3.8788, 4.1003], + [-0.8411, 0.8267], + [-2.5926, 2.8042], + [-0.6380, 0.7336], + [-3.6542, 4.0623], + [-1.3902, 1.2164], + [-2.8431, 3.0722], + [-2.7664, 3.0282], + [-2.0784, 2.2509], + [-0.2011, 0.0721], + [ 1.1892, -1.2960], + [-1.5548, 1.6290], + [-3.9015, 4.1152], + [ 0.3628, -0.9750], + [-2.8698, 3.0985], + [ 1.1742, -1.3078], + [ 0.4569, -0.5204], + [-0.2316, 0.1717], + [-3.2598, 3.5289], + [-2.3873, 2.5428], + [ 1.7879, -1.9975], + [-2.7821, 3.0868], + [-2.1678, 2.5226], + [-1.8795, 1.9626], + [-1.8553, 1.9842], + [-1.9408, 1.8010], + [-3.2660, 3.6804], + [-2.1934, 2.2723], + [-1.3888, 1.5505], + [-1.1865, 1.6902], + [-3.7211, 3.8818], + [-3.4240, 3.7289], + [-3.0793, 3.1593], + [-2.0262, 1.9146], + [-2.8247, 2.7726], + [-2.5195, 2.7847], + [-0.4326, 0.2431], + [-1.8189, 1.9643], + [-0.3648, 0.4705], + [-3.4296, 3.7046], + [-3.1337, 3.3430], + [-0.1328, -0.1285], + [-3.7912, 4.1669], + [-2.2098, 2.3609], + [-1.5910, 2.1625], + [-2.8524, 3.0820], + [-2.9036, 3.1283], + [-1.8966, 2.0315], + [-3.0637, 3.2539], + [-1.5712, 1.4968], + [-3.4346, 3.5469], + [-3.3101, 3.5263], + [-3.6086, 3.7992], + [-0.6672, 0.9061], + [-3.3658, 3.5995], + [-3.2524, 3.6317], + [ 0.4099, -0.7782], + [-1.7323, 1.7522], + [-2.1743, 2.5211], + [-2.2997, 2.4618], + [-0.2397, 0.1698], + [-2.9487, 3.1550], + [-3.2929, 3.7319], + [-2.4469, 2.5110], + [-3.3715, 3.4397], + [-1.8808, 2.1185], + [-1.9102, 1.9541], + [-3.1351, 3.3689], + [-3.3466, 3.6034], + [-0.1234, -0.0502], + [-1.7935, 1.9395], + [-2.4632, 2.4683], + [-2.4542, 2.4326], + [-2.2412, 2.5815], + [-0.3577, 0.2594], + [-2.1205, 2.3773], + [-3.6844, 3.8858], + [-1.1414, 1.1905], + [-2.0204, 2.2186], + [-1.0739, 0.8732], + [ 0.8438, -1.2067], + [ 0.5034, -0.7305], + [-1.7261, 1.9512], + [-2.4399, 2.4394], + [-3.3017, 3.6129], + [-1.6989, 2.0324], + [-3.1782, 3.3959], + [-2.3619, 2.4941], + [-0.5844, 0.6685], + [-3.5436, 3.6749], + [-3.0119, 3.1702], + [-3.4981, 3.6360], + [-3.4510, 3.7386], + [-2.5662, 2.5432], + [-0.4908, 0.7930], + [ 0.7360, -1.2024], + [-2.5657, 2.8397], + [-3.8070, 4.0339], + [-1.9157, 1.9148], + [-2.7485, 3.2205], + [ 1.3781, -1.7946], + [-3.7220, 4.0068], + [-3.5334, 3.8494], + [-0.9939, 1.0070], + [-0.2814, 0.0353], + [-2.3583, 2.1634], + [-0.3843, 0.3344], + [-3.6133, 3.7662], + [-3.0480, 3.2299], + [-0.9360, 1.1374], + [-0.8108, 1.0966], + [-3.3650, 3.6836], + [-3.2961, 3.7996], + [-2.8137, 3.0223], + [-1.3545, 1.2933], + [ 2.1202, -2.2917], + [-1.8080, 1.9380], + [-0.7913, 0.5041], + [-1.6866, 1.6836], + [-3.0415, 3.2955], + [-1.0532, 0.9037], + [-1.4280, 1.3926], + [-1.1469, 1.1306], + [-0.8945, 0.9206], + [-0.9216, 0.8250], + [-0.3051, 0.2749], + [-3.6740, 3.8601], + [-1.4885, 1.5172], + [-2.5026, 2.7698], + [-3.4414, 3.7240], + [-1.9195, 1.8675], + [-1.0173, 0.9356], + [-2.3978, 2.6985], + [-1.7550, 1.9330], + [-3.0171, 3.0932], + [-3.8254, 4.1234], + [-3.4929, 3.7038], + [-3.0805, 3.4005], + [-3.0512, 3.2265], + [-2.0942, 2.1664], + [ 0.1886, -0.3084], + [-1.5478, 1.6761], + [ 0.8160, -1.0750], + [-0.3190, 0.4135], + [-1.0707, 1.1047], + [-1.9182, 2.0387], + [-2.5133, 2.9091], + [-2.0858, 2.1790], + [-3.0288, 3.1730], + [-2.9680, 3.2245], + [ 1.9458, -2.3247], + [-2.7151, 3.2335], + [-3.7970, 4.0869], + [-0.5745, 0.3360], + [-2.5238, 2.2782], + [-3.0855, 3.4687], + [-3.0605, 3.3024], + [-0.9810, 1.2602], + [-3.3958, 3.5385], + [-0.4301, 0.6596], + [-0.6371, 0.3738], + [-0.0576, -0.2918], + [-2.7569, 2.9034], + [-1.8097, 2.0823], + [-1.2826, 1.3549], + [-0.5537, 0.2475], + [-2.9122, 3.4583], + [-2.5209, 2.5856], + [-1.6834, 2.0438], + [-3.1930, 3.5107], + [ 0.1880, -0.3189], + [-2.5966, 2.7311], + [-2.3980, 2.7684], + [-1.7972, 1.8432], + [-2.6092, 2.8644], + [-0.9870, 1.2768], + [-1.6024, 1.7250], + [-3.1678, 3.2419], + [-0.0479, 0.0686], + [-2.1187, 2.3496], + [-1.9597, 2.2198], + [-0.3148, 0.1599], + [-1.6159, 1.9794], + [-0.6016, 0.6860], + [-1.8992, 1.8750], + [-1.5738, 1.8095], + [-0.0893, -0.0319], + [-2.9550, 3.2104], + [-3.0848, 3.2991], + [-2.4363, 2.7098], + [-2.9542, 3.2132], + [ 0.3850, -0.7025], + [-1.7787, 1.8109], + [-2.3998, 2.6752], + [ 0.3196, -0.2652], + [-3.2151, 3.4898], + [ 0.3744, -0.5816], + [-3.2140, 3.4099], + [-2.9860, 3.3813], + [-3.9669, 4.3194], + [-0.5157, 0.9441], + [-2.6992, 2.8726], + [-1.5660, 1.7592], + [-3.3882, 3.7501], + [-2.7607, 2.9443], + [-1.1572, 1.1342], + [ 0.3021, -0.5749], + [ 0.2954, -0.4062], + [-1.2934, 1.0552], + [-2.4360, 2.6054], + [-0.3086, 0.1290], + [-1.9320, 2.1541], + [ 0.1103, -0.2316], + [-2.9046, 2.9939], + [ 0.4723, -0.6069], + [-2.5358, 2.7200], + [-2.0056, 2.2631], + [-3.5547, 3.7642], + [-3.1869, 3.3921], + [-2.1833, 2.1657], + [-1.9932, 2.1463], + [-1.8808, 2.1674], + [-0.5806, 0.6455], + [-1.8072, 1.9702], + [-0.9252, 0.8849], + [-1.5347, 1.4253], + [-0.5662, 0.4836], + [-0.5831, 0.5282], + [ 1.6633, -1.8568], + [-2.2083, 2.3920], + [-2.6642, 2.7816], + [-1.1973, 1.3607], + [ 0.7051, -0.8418], + [-2.0847, 2.2495], + [-1.1207, 1.0125], + [-1.8529, 1.9424], + [-1.6132, 1.7780], + [-1.5539, 1.7923], + [-1.1329, 1.0738], + [-1.0605, 1.3886], + [-0.4798, 0.4251], + [-0.6906, 0.5466], + [-2.5555, 2.8649], + [-1.6955, 1.7637], + [-2.9571, 3.2512], + [-3.2381, 3.3829], + [-3.1676, 3.6032], + [-2.6506, 3.1160], + [-3.3050, 3.7204], + [-3.2189, 3.3780], + [-1.2600, 1.4192], + [-2.5310, 2.6249], + [ 1.5243, -1.8395], + [-2.5988, 2.6142], + [-1.9194, 2.0715], + [-0.9564, 0.9532], + [ 0.0229, -0.0917], + [ 0.0637, -0.2049], + [-2.8092, 3.0482], + [-3.6791, 4.0061], + [-1.7692, 2.1355], + [-2.4848, 2.6752], + [-0.0905, -0.2192], + [-0.7332, 0.7974], + [ 0.9504, -1.0731], + [-3.3192, 3.4670], + [-2.2020, 2.2188], + [-2.2750, 2.2420], + [ 0.0299, -0.1347], + [ 1.0303, -1.2120], + [-1.3079, 1.3272], + [-1.7684, 1.6886], + [ 0.7130, -0.8594], + [-3.1269, 3.5977], + [-3.5142, 3.9223], + [-2.2141, 2.5721], + [-0.4474, 0.3397], + [-2.7628, 2.7803], + [-3.0168, 3.1755], + [ 0.6983, -0.8055], + [-3.0099, 3.2050], + [-0.4449, 0.4094], + [-1.2243, 1.4546], + [-2.3942, 2.6259], + [-3.4758, 3.6130], + [-1.2179, 1.2891], + [ 1.1277, -1.4921], + [-3.7286, 4.0435], + [ 0.0316, -0.0758], + [-0.0736, -0.1382], + [-3.6054, 3.8539], + [ 1.6698, -1.9119], + [-0.8765, 0.7696], + [ 0.1951, -0.1587], + [ 1.0489, -1.2827], + [ 0.3458, -0.3961], + [-0.6282, 0.6627], + [-0.1240, -0.0754], + [-3.7474, 4.1056], + [-1.7484, 1.7143], + [-2.7518, 3.1269], + [-1.2917, 1.3709], + [-1.5118, 1.2843], + [-3.2318, 3.4629], + [-3.6333, 4.0143], + [-0.6156, 0.4026], + [-2.4866, 2.4241], + [-2.6576, 2.9008], + [-3.3306, 3.5127], + [-3.5559, 3.8255], + [-3.8055, 3.9495], + [-0.5978, 0.6117], + [-2.2229, 2.5293], + [-3.1642, 3.2831], + [-3.3993, 3.7514], + [ 0.9731, -1.1885], + [ 0.5549, -0.7508], + [-2.7735, 3.1358], + [-3.6333, 3.9488], + [-2.0953, 2.3401], + [-3.8599, 4.1917], + [-2.3304, 2.5061], + [-3.0559, 3.2503], + [ 0.9991, -1.3721], + [-2.9623, 3.2859], + [-0.6369, 0.7665], + [-1.8852, 2.0533], + [ 1.1816, -1.3391], + [-0.9895, 1.0936], + [-2.5371, 2.7089], + [-0.6511, 0.7637], + [-1.3391, 1.0736], + [-2.5678, 2.8843], + [ 0.8149, -0.9045], + [-3.0830, 3.3246], + [-3.0633, 3.3244], + [-1.3420, 1.1603], + [-2.7132, 2.8072], + [-3.9282, 4.3133], + [-1.9181, 2.3854], + [-1.5626, 1.7421], + [ 0.1559, -0.3074], + [-0.0407, -0.0165], + [-0.3256, 0.3955], + [-3.2942, 3.6234], + [-1.9325, 1.9002], + [-3.1229, 3.3396], + [ 0.0210, -0.2822], + [-0.0969, 0.2004], + [-2.5363, 2.6008], + [ 0.1376, -0.2884], + [-1.7063, 1.6728], + [-3.4940, 3.7202], + [-2.1096, 2.4878], + [-3.0564, 3.2515], + [-0.4411, 0.5586], + [-3.8190, 3.9844], + [-2.3345, 2.2285], + [-2.1409, 2.3052], + [-2.2439, 2.5598], + [-0.9181, 0.8563], + [-2.7053, 2.8981], + [-2.6911, 2.9820], + [-2.9722, 3.2575], + [-1.8183, 1.8838], + [-2.5944, 2.7960], + [-2.7915, 3.0922], + [-1.1437, 1.2124], + [-2.7866, 3.0866], + [-3.8993, 4.2891], + [-2.2451, 2.5150], + [-0.0987, -0.0580], + [-3.3242, 3.5876], + [-0.1001, 0.0205], + [-3.4840, 3.8004], + [-0.9991, 1.2006], + [ 2.7702, -3.0397], + [-0.5956, 0.3825], + [-3.7929, 4.2022], + [-0.4319, 0.5578], + [-1.2671, 1.3257]], device='cuda:0') +06/01/2024 11:43:08 - INFO - __main__ - tensor([[[ 4.5788, 3.4278], + [ 3.4278, 4.5986]], + + [[ 2.8406, 0.6006], + [ 0.6006, 2.9116]], + + [[ 3.0848, -0.1513], + [-0.1513, 3.2920]], + + ..., + + [[ 4.7777, 3.7158], + [ 3.7158, 4.7146]], + + [[ 3.5252, 0.0207], + [ 0.0207, 3.9324]], + + [[ 3.6555, -0.4447], + [-0.4447, 4.0240]]], device='cuda:0') +06/01/2024 11:43:08 - INFO - __main__ - ***** Completed training ***** +06/01/2024 11:43:12 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 11:43:12 - INFO - __main__ - ***** Starting script ***** +06/01/2024 11:43:13 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 11:43:13 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/adapter_config.json +06/01/2024 11:43:14 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 11:43:14 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_adapter.bin +06/01/2024 11:43:14 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/head_config.json +06/01/2024 11:43:14 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 11:43:14 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_model_head.bin +06/01/2024 11:43:14 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:43:14 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:43:14 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 11:43:14 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 11:43:14 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 11:43:14 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 11:43:22 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [101, 1000, 5262, 1010, 4238, 2323, 2022, 2404, 2006, 5060, 2008, 4073, 2000, 3046, 2000, 12661, 5712, 1999, 2037, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102, 1000, 4238, 2323, 2022, 2006, 5060, 2008, 4740, 2000, 12661, 5712, 1999, 4238, 1005, 1055, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:43:22 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [101, 3962, 2751, 2001, 9339, 2012, 1002, 4029, 2581, 1012, 3938, 1013, 4029, 2620, 1012, 3438, 2019, 19471, 2012, 6694, 13938, 2102, 1010, 2383, 9847, 2039, 2000, 1002, 4029, 2683, 1012, 2753, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2184, 1012, 102, 3962, 2751, 2001, 9339, 2012, 1002, 3486, 2620, 1012, 3515, 1013, 3486, 2683, 1012, 2321, 2019, 19471, 2012, 28714, 2692, 13938, 2102, 1010, 2383, 14051, 2004, 2152, 2004, 1002, 3486, 2683, 1012, 2423, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2423, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 11:43:22 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [101, 12411, 2015, 1012, 2198, 11260, 1998, 3960, 5846, 6430, 29492, 2000, 3713, 1012, 102, 1996, 2053, 1011, 3065, 2020, 12411, 2015, 1012, 2198, 11260, 1997, 4404, 1998, 3960, 5846, 1997, 3516, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:45:45 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 11:45:45 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 11:45:45 - INFO - __main__ - tensor([[-5.0638e+00, 5.3815e+00], + [ 3.5345e+00, -3.7987e+00], + [ 2.5510e+00, -2.6470e+00], + [-1.5899e+00, 1.8431e+00], + [ 2.5673e+00, -2.9404e+00], + [-3.1044e+00, 3.1672e+00], + [-4.8299e+00, 5.0258e+00], + [-2.9351e+00, 3.2471e+00], + [-3.5175e+00, 3.8602e+00], + [-4.5338e+00, 4.8462e+00], + [-4.9041e+00, 5.0860e+00], + [ 5.4113e+00, -5.6389e+00], + [ 2.3895e+00, -2.5372e+00], + [-2.0964e+00, 2.2840e+00], + [-3.1692e+00, 3.2926e+00], + [-1.4977e+00, 1.2737e+00], + [-4.2091e+00, 4.4992e+00], + [-5.4318e-03, 8.2547e-02], + [-3.4209e+00, 3.6553e+00], + [ 2.4613e+00, -2.6835e+00], + [ 3.1824e+00, -3.2943e+00], + [-2.1484e+00, 2.3183e+00], + [ 1.8307e+00, -2.0127e+00], + [-2.8655e+00, 3.0627e+00], + [ 9.9194e-01, -1.1509e+00], + [ 2.4734e+00, -2.7784e+00], + [-2.0424e+00, 2.1264e+00], + [-4.8503e+00, 5.0206e+00], + [ 1.4518e+00, -1.6531e+00], + [-2.9276e+00, 3.0992e+00], + [ 2.1291e+00, -2.1436e+00], + [-4.5784e+00, 5.0257e+00], + [-1.8504e+00, 1.6845e+00], + [-3.0820e+00, 3.2915e+00], + [-4.1198e+00, 4.3553e+00], + [-2.2354e+00, 2.4211e+00], + [ 2.0507e+00, -2.3095e+00], + [ 4.0288e+00, -4.2134e+00], + [-9.4820e-02, 6.7006e-02], + [-4.8885e+00, 5.0617e+00], + [ 2.8147e+00, -3.3831e+00], + [-4.2792e+00, 4.5887e+00], + [ 3.6375e+00, -3.7492e+00], + [ 2.6799e+00, -2.8174e+00], + [ 1.3924e-01, -2.8597e-01], + [-4.2163e+00, 4.5826e+00], + [-3.0926e+00, 3.3034e+00], + [ 4.2053e+00, -4.4312e+00], + [-3.6406e+00, 3.9240e+00], + [-2.7360e+00, 3.0063e+00], + [-3.0719e+00, 3.1699e+00], + [-2.7123e+00, 2.8205e+00], + [-2.6879e+00, 2.6038e+00], + [-4.3868e+00, 4.8028e+00], + [-3.1621e+00, 3.2501e+00], + [-1.2065e+00, 1.3243e+00], + [-1.4955e+00, 2.0453e+00], + [-4.8316e+00, 4.9075e+00], + [-4.7519e+00, 5.0053e+00], + [-3.4205e+00, 3.4848e+00], + [-4.0446e-02, -2.9004e-01], + [-3.1186e+00, 2.9449e+00], + [-3.6631e+00, 3.8303e+00], + [ 7.0847e-02, -3.4002e-01], + [-2.5027e+00, 2.7293e+00], + [ 6.7577e-01, -6.0819e-01], + [-4.6157e+00, 4.8692e+00], + [-4.2222e+00, 4.4565e+00], + [ 1.6401e+00, -1.9985e+00], + [-4.9008e+00, 5.2454e+00], + [-3.2721e+00, 3.4611e+00], + [-1.8612e-01, 6.2022e-01], + [-2.7669e+00, 2.9484e+00], + [-3.2772e+00, 3.4370e+00], + [-2.3884e+00, 2.4397e+00], + [-4.5428e+00, 4.7204e+00], + [-1.3475e+00, 1.1659e+00], + [-4.8578e+00, 4.9379e+00], + [-4.0937e+00, 4.4212e+00], + [-4.6094e+00, 4.8531e+00], + [-4.0243e-01, 6.8700e-01], + [-4.7123e+00, 4.8806e+00], + [-4.4442e+00, 4.8041e+00], + [ 3.4164e+00, -3.8267e+00], + [-2.3022e+00, 2.3382e+00], + [-4.1052e+00, 4.5566e+00], + [-2.9831e+00, 3.1069e+00], + [-8.9934e-02, -1.6890e-02], + [-4.5033e+00, 4.6926e+00], + [-4.6185e+00, 5.0175e+00], + [-1.7376e+00, 1.7485e+00], + [-4.1855e+00, 4.1954e+00], + [-2.7353e+00, 3.0669e+00], + [-2.8299e+00, 2.8631e+00], + [-4.3882e+00, 4.6322e+00], + [-4.8923e+00, 5.2114e+00], + [ 8.9688e-01, -1.1505e+00], + [-1.3712e+00, 1.3170e+00], + [-3.3517e+00, 3.3794e+00], + [-3.8708e+00, 3.9575e+00], + [-3.3853e+00, 3.7460e+00], + [ 1.1852e+00, -1.4585e+00], + [-3.3842e+00, 3.6589e+00], + [-5.0890e+00, 5.3026e+00], + [ 1.9335e-01, -2.0711e-01], + [-2.2852e+00, 2.3728e+00], + [-7.2358e-02, -1.7532e-01], + [ 3.7163e+00, -4.1431e+00], + [ 1.6224e+00, -1.8636e+00], + [-1.9951e+00, 2.2482e+00], + [-2.6156e+00, 2.4817e+00], + [-3.9002e+00, 4.0463e+00], + [-2.2387e+00, 2.5138e+00], + [-4.1900e+00, 4.4812e+00], + [-1.9511e+00, 1.9778e+00], + [ 1.3340e+00, -1.5456e+00], + [-4.5501e+00, 4.7474e+00], + [-3.7418e+00, 3.9834e+00], + [-4.8254e+00, 4.9729e+00], + [-4.7692e+00, 5.0870e+00], + [-3.5510e+00, 3.5316e+00], + [-9.7154e-01, 1.3066e+00], + [ 2.3457e+00, -2.7861e+00], + [-3.6318e+00, 3.9180e+00], + [-5.0023e+00, 5.1923e+00], + [-2.7392e+00, 2.7770e+00], + [-4.2904e+00, 4.7887e+00], + [ 2.4142e+00, -2.8701e+00], + [-4.8106e+00, 5.0630e+00], + [-4.7303e+00, 4.9766e+00], + [-1.6829e+00, 1.7657e+00], + [ 2.8001e+00, -3.1134e+00], + [-2.0522e+00, 1.6575e+00], + [ 2.0852e+00, -2.1884e+00], + [-4.9141e+00, 5.1100e+00], + [-4.0043e+00, 4.1602e+00], + [-1.0850e+00, 1.2162e+00], + [ 5.2262e-01, -4.2047e-01], + [-4.0363e+00, 4.2413e+00], + [-4.8911e+00, 5.3372e+00], + [-3.8285e+00, 4.0185e+00], + [-1.0010e+00, 8.9627e-01], + [ 4.4391e+00, -4.6315e+00], + [-2.0490e+00, 2.1718e+00], + [-1.2840e+00, 1.0267e+00], + [-4.6493e-01, 3.2288e-01], + [-4.5114e+00, 4.8252e+00], + [-4.3932e-01, 2.1761e-01], + [ 7.5598e-01, -9.3587e-01], + [-3.3772e+00, 3.4814e+00], + [ 1.5455e+00, -1.7042e+00], + [-2.1101e+00, 2.0842e+00], + [-6.6190e-01, 7.0094e-01], + [-4.7928e+00, 5.0022e+00], + [-2.5127e-01, 1.1627e-01], + [-3.6752e+00, 4.0433e+00], + [-5.0607e+00, 5.3576e+00], + [-8.5788e-01, 7.4310e-01], + [ 1.6696e-01, -3.2263e-01], + [-2.7777e+00, 3.0809e+00], + [-2.0695e+00, 2.2868e+00], + [-4.6982e+00, 4.8456e+00], + [-4.8978e+00, 5.1613e+00], + [-4.5970e+00, 4.7574e+00], + [-3.9658e+00, 4.2007e+00], + [-3.6662e+00, 3.8315e+00], + [-2.2508e+00, 2.2381e+00], + [-2.5317e-01, 1.8912e-01], + [-1.5370e+00, 1.6588e+00], + [ 2.2223e+00, -2.5341e+00], + [ 1.1078e+00, -1.0637e+00], + [ 3.4534e-01, -3.4772e-01], + [-1.4476e+00, 1.5710e+00], + [-2.5490e+00, 2.9727e+00], + [-1.6267e+00, 1.6067e+00], + [-4.0103e+00, 4.1959e+00], + [-4.3027e+00, 4.6028e+00], + [ 4.5139e+00, -4.8203e+00], + [-3.6213e+00, 4.1625e+00], + [-4.9474e+00, 5.2309e+00], + [-1.0897e+00, 8.2791e-01], + [-2.9481e+00, 2.6801e+00], + [-3.8585e+00, 4.2163e+00], + [-4.4725e+00, 4.6829e+00], + [-8.7032e-01, 1.1067e+00], + [-4.3520e+00, 4.4933e+00], + [-2.6708e-02, 3.1807e-01], + [ 4.9376e-01, -8.7053e-01], + [ 9.5226e-01, -1.3099e+00], + [-3.6618e+00, 3.8457e+00], + [-1.5810e+00, 1.8957e+00], + [ 2.2400e+00, -2.4131e+00], + [ 2.8348e-01, -5.7897e-01], + [-3.5165e+00, 4.0917e+00], + [-2.9747e+00, 3.0639e+00], + [-2.5390e+00, 3.0051e+00], + [-4.1326e+00, 4.4462e+00], + [ 6.2013e-01, -7.5420e-01], + [-3.5649e+00, 3.7058e+00], + [-3.7165e+00, 4.0817e+00], + [-3.2969e+00, 3.3381e+00], + [-4.3193e+00, 4.6466e+00], + [-3.0599e-01, 5.0403e-01], + [-9.5256e-01, 9.8204e-01], + [-4.5482e+00, 4.6264e+00], + [ 2.7743e+00, -2.9798e+00], + [-3.1239e+00, 3.3457e+00], + [-2.3297e+00, 2.6710e+00], + [ 2.1041e+00, -2.3688e+00], + [-2.4691e+00, 3.0668e+00], + [ 5.1168e-02, 1.4243e-02], + [-1.4368e+00, 1.3709e+00], + [-1.6902e+00, 1.8027e+00], + [ 1.5913e+00, -1.8114e+00], + [-3.9177e+00, 4.2178e+00], + [-4.2923e+00, 4.4834e+00], + [-4.0413e+00, 4.3799e+00], + [-3.9514e+00, 4.1475e+00], + [ 2.4603e+00, -2.8174e+00], + [-8.8419e-01, 7.7229e-01], + [-2.5785e+00, 2.8905e+00], + [ 2.4339e+00, -2.5166e+00], + [-4.0854e+00, 4.4242e+00], + [ 3.5046e+00, -3.7910e+00], + [-2.8938e+00, 2.9454e+00], + [-3.9496e+00, 4.3139e+00], + [-4.8837e+00, 5.2451e+00], + [-5.8862e-01, 1.0308e+00], + [-4.1868e+00, 4.4445e+00], + [-2.5420e+00, 2.8205e+00], + [-4.2328e+00, 4.5583e+00], + [-4.2253e+00, 4.3492e+00], + [ 1.1054e+00, -1.2782e+00], + [ 3.3317e+00, -3.7419e+00], + [ 1.8751e+00, -2.0526e+00], + [-2.2994e+00, 2.2052e+00], + [-4.6693e+00, 4.9899e+00], + [ 1.0133e+00, -1.2663e+00], + [-5.6484e-01, 7.0758e-01], + [ 1.4154e+00, -1.6405e+00], + [-3.8062e+00, 3.9377e+00], + [ 2.7886e+00, -2.9472e+00], + [-2.2572e+00, 2.3044e+00], + [-2.3317e+00, 2.5776e+00], + [-4.7114e+00, 4.9221e+00], + [-4.7917e+00, 5.0866e+00], + [-2.7005e+00, 2.7582e+00], + [-1.6482e+00, 1.7254e+00], + [-2.3828e+00, 2.7066e+00], + [-1.6596e-01, 2.3500e-01], + [-2.0120e+00, 2.1861e+00], + [-1.4967e-01, 7.0490e-02], + [-9.1080e-01, 6.7676e-01], + [-7.0901e-01, 6.6963e-01], + [ 9.3546e-01, -1.0440e+00], + [ 4.1752e+00, -4.4031e+00], + [-2.7480e+00, 2.8994e+00], + [-3.4318e+00, 3.4722e+00], + [-4.2334e-01, 6.0195e-01], + [ 1.7373e+00, -1.8777e+00], + [-1.8009e+00, 1.7532e+00], + [-1.7203e+00, 1.6713e+00], + [-1.3921e+00, 1.3239e+00], + [-1.9915e+00, 2.1561e+00], + [-2.2695e+00, 2.5401e+00], + [-2.4327e-01, 1.0851e-01], + [ 4.3720e-02, 1.4462e-01], + [ 1.4950e+00, -1.6775e+00], + [ 7.9640e-02, -2.8929e-01], + [-4.2766e+00, 4.5710e+00], + [-5.8268e-01, 5.2662e-01], + [-4.3076e+00, 4.6503e+00], + [-4.4570e+00, 4.5915e+00], + [-3.6544e+00, 4.1191e+00], + [-3.2367e+00, 3.7455e+00], + [-4.4567e+00, 4.8049e+00], + [-3.0974e+00, 3.1811e+00], + [-4.9033e-01, 5.7827e-01], + [-3.4402e+00, 3.5252e+00], + [ 4.1169e+00, -4.4989e+00], + [-3.7762e+00, 3.8057e+00], + [-1.6402e+00, 1.6565e+00], + [-1.7848e+00, 1.7997e+00], + [ 2.5968e+00, -2.8325e+00], + [ 2.8869e+00, -3.0481e+00], + [-3.7594e+00, 3.9834e+00], + [-4.9701e+00, 5.2406e+00], + [-2.2926e+00, 2.5951e+00], + [-2.9168e+00, 3.1525e+00], + [-7.8670e-01, 5.3056e-01], + [ 7.4706e-01, -8.2136e-01], + [ 2.5003e+00, -2.6738e+00], + [-4.2035e+00, 4.3299e+00], + [-3.5134e+00, 3.5827e+00], + [-3.3883e+00, 3.3836e+00], + [ 2.7967e+00, -3.0050e+00], + [ 2.7358e+00, -2.9974e+00], + [-8.7604e-01, 8.7150e-01], + [-1.6716e+00, 1.5927e+00], + [ 1.2572e+00, -1.4806e+00], + [-4.1529e+00, 4.5861e+00], + [-4.3132e+00, 4.6863e+00], + [-2.7432e+00, 3.1708e+00], + [ 7.6986e-01, -9.0376e-01], + [-4.2673e+00, 4.3255e+00], + [-3.7926e+00, 3.9463e+00], + [ 2.9719e+00, -3.1469e+00], + [-4.0152e+00, 4.2042e+00], + [ 9.7374e-01, -1.1225e+00], + [-1.8255e-01, 3.1550e-01], + [-3.8425e+00, 4.0570e+00], + [-4.4905e+00, 4.6003e+00], + [-7.7319e-01, 8.4772e-01], + [ 3.1691e+00, -3.5530e+00], + [-4.9458e+00, 5.2793e+00], + [ 8.5665e-01, -9.2502e-01], + [ 1.4071e+00, -1.6431e+00], + [-4.9268e+00, 5.1999e+00], + [ 3.6692e+00, -3.9203e+00], + [ 3.2127e-01, -6.5833e-01], + [ 2.5744e+00, -2.6434e+00], + [ 3.5822e+00, -3.8969e+00], + [ 2.7338e+00, -2.8984e+00], + [ 5.4862e-01, -5.3079e-01], + [ 2.2587e+00, -2.5142e+00], + [-4.9612e+00, 5.3541e+00], + [-9.2738e-01, 7.7489e-01], + [-3.7081e+00, 4.1047e+00], + [-3.3691e+00, 3.6072e+00], + [-1.3948e+00, 1.1001e+00], + [-4.2427e+00, 4.4618e+00], + [-4.9675e+00, 5.3595e+00], + [-6.8942e-01, 5.0739e-01], + [-2.3150e+00, 2.2151e+00], + [-4.3230e+00, 4.6412e+00], + [-4.6183e+00, 4.7631e+00], + [-4.7927e+00, 5.0603e+00], + [-4.9660e+00, 5.0278e+00], + [-6.9058e-01, 7.8015e-01], + [-2.6852e+00, 2.9125e+00], + [-4.2784e+00, 4.4378e+00], + [-4.3029e+00, 4.6284e+00], + [ 2.7134e+00, -2.9489e+00], + [ 3.3557e+00, -3.6074e+00], + [-3.2272e+00, 3.5326e+00], + [-4.8296e+00, 5.1071e+00], + [-2.6177e+00, 2.8950e+00], + [-5.1861e+00, 5.5254e+00], + [-2.3951e+00, 2.5510e+00], + [-3.5234e+00, 3.6811e+00], + [ 2.6858e+00, -3.0712e+00], + [-3.9257e+00, 4.3297e+00], + [-3.3062e-01, 5.9655e-01], + [-2.3269e+00, 2.5106e+00], + [ 4.2638e+00, -4.4435e+00], + [-1.0673e+00, 1.1002e+00], + [-3.3287e+00, 3.5639e+00], + [ 1.4134e+00, -1.4789e+00], + [ 5.1170e-01, -8.9876e-01], + [-4.3550e+00, 4.8318e+00], + [ 2.9822e+00, -3.1535e+00], + [-3.7404e+00, 3.9232e+00], + [-3.2935e+00, 3.5268e+00], + [-2.3489e-01, -6.0951e-02], + [-3.4824e+00, 3.6156e+00], + [-5.1255e+00, 5.4850e+00], + [-2.1359e+00, 2.5941e+00], + [-1.8064e-01, 1.5012e-01], + [ 1.9283e+00, -2.1410e+00], + [ 2.1605e+00, -2.3476e+00], + [-6.8277e-01, 7.0188e-01], + [-4.6530e+00, 4.9061e+00], + [-2.2193e+00, 2.2181e+00], + [-4.1529e+00, 4.3730e+00], + [ 9.2899e-01, -1.2983e+00], + [ 2.0571e+00, -2.1760e+00], + [-3.4618e+00, 3.5366e+00], + [ 2.6583e+00, -2.9356e+00], + [-1.8755e+00, 1.7648e+00], + [-4.5736e+00, 4.8610e+00], + [-2.4220e+00, 2.7352e+00], + [-4.5966e+00, 4.7627e+00], + [ 1.1275e+00, -1.0837e+00], + [-5.0202e+00, 5.2183e+00], + [-2.1283e+00, 1.9806e+00], + [-2.4779e+00, 2.5245e+00], + [-1.0211e+00, 1.1059e+00], + [ 5.7964e-01, -7.8042e-01], + [-3.6731e+00, 3.8753e+00], + [-2.6417e+00, 2.8501e+00], + [-3.5427e+00, 3.8494e+00], + [-2.8376e+00, 2.9754e+00], + [-3.3389e+00, 3.5715e+00], + [-3.6208e+00, 3.8786e+00], + [-9.2618e-01, 9.3254e-01], + [-3.8696e+00, 4.1519e+00], + [-4.9700e+00, 5.3113e+00], + [-2.3889e+00, 2.6083e+00], + [-2.8473e-01, 2.1304e-01], + [-3.8306e+00, 4.1417e+00], + [ 1.7923e+00, -2.0023e+00], + [-5.0484e+00, 5.3642e+00], + [-1.9252e+00, 2.1636e+00], + [ 4.9626e+00, -5.1775e+00], + [ 1.5768e+00, -1.9093e+00], + [-5.1049e+00, 5.5017e+00], + [ 2.6298e+00, -2.7599e+00], + [-1.2545e+00, 1.3116e+00]], device='cuda:0') +06/01/2024 11:45:45 - INFO - __main__ - tensor([[[ 6.6937, 5.0493], + [ 5.0493, 6.5957]], + + [[ 5.7111, 1.1559], + [ 1.1559, 5.6840]], + + [[ 7.6710, -2.8800], + [ -2.8800, 8.1546]], + + ..., + + [[ 6.8520, 5.1457], + [ 5.1457, 6.7309]], + + [[ 8.6616, -3.3820], + [ -3.3820, 9.3568]], + + [[ 13.2267, -10.1457], + [-10.1457, 14.8300]]], device='cuda:0') +06/01/2024 11:45:45 - INFO - __main__ - ***** Completed training ***** +06/01/2024 11:45:49 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 11:45:49 - INFO - __main__ - ***** Starting script ***** +06/01/2024 11:45:50 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 11:45:51 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/adapter_config.json +06/01/2024 11:45:51 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 11:45:51 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_adapter.bin +06/01/2024 11:45:51 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/head_config.json +06/01/2024 11:45:51 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 11:45:51 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_model_head.bin +06/01/2024 11:45:51 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:45:51 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:45:51 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 11:45:51 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 11:45:51 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 11:45:51 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 11:45:59 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [101, 1000, 5262, 1010, 4238, 2323, 2022, 2404, 2006, 5060, 2008, 4073, 2000, 3046, 2000, 12661, 5712, 1999, 2037, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102, 1000, 4238, 2323, 2022, 2006, 5060, 2008, 4740, 2000, 12661, 5712, 1999, 4238, 1005, 1055, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:45:59 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [101, 3962, 2751, 2001, 9339, 2012, 1002, 4029, 2581, 1012, 3938, 1013, 4029, 2620, 1012, 3438, 2019, 19471, 2012, 6694, 13938, 2102, 1010, 2383, 9847, 2039, 2000, 1002, 4029, 2683, 1012, 2753, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2184, 1012, 102, 3962, 2751, 2001, 9339, 2012, 1002, 3486, 2620, 1012, 3515, 1013, 3486, 2683, 1012, 2321, 2019, 19471, 2012, 28714, 2692, 13938, 2102, 1010, 2383, 14051, 2004, 2152, 2004, 1002, 3486, 2683, 1012, 2423, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2423, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 11:45:59 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [101, 12411, 2015, 1012, 2198, 11260, 1998, 3960, 5846, 6430, 29492, 2000, 3713, 1012, 102, 1996, 2053, 1011, 3065, 2020, 12411, 2015, 1012, 2198, 11260, 1997, 4404, 1998, 3960, 5846, 1997, 3516, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:48:24 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 11:48:24 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 11:48:24 - INFO - __main__ - tensor([[-6.3273, 6.5760], + [ 5.3661, -5.6171], + [ 3.6309, -3.7107], + [-2.3283, 2.5674], + [ 4.0812, -4.4334], + [-4.1061, 4.1773], + [-6.1976, 6.3324], + [-4.8860, 5.2570], + [-5.6913, 5.9782], + [-5.9602, 6.2424], + [-6.4155, 6.5778], + [ 6.7721, -6.9304], + [ 2.7936, -2.9488], + [-4.4780, 4.6653], + [-5.2878, 5.4724], + [-1.6371, 1.3176], + [-5.3369, 5.5819], + [-0.7589, 0.8674], + [-4.9551, 5.1841], + [ 4.2108, -4.4433], + [ 4.2864, -4.4029], + [-4.2466, 4.5164], + [ 1.7507, -1.8371], + [-4.2084, 4.4476], + [ 1.9266, -2.0477], + [ 3.6588, -3.9567], + [-4.0593, 4.1561], + [-6.2286, 6.3946], + [ 0.5167, -0.6622], + [-5.0150, 5.2446], + [ 3.0647, -3.1234], + [-5.9563, 6.2995], + [-3.2416, 3.0725], + [-5.3438, 5.5424], + [-6.0257, 6.2662], + [-4.5501, 4.7716], + [-0.7467, 0.6654], + [ 5.9303, -6.0926], + [-0.6636, 0.6475], + [-6.1224, 6.3116], + [ 5.0219, -5.4813], + [-6.0661, 6.3523], + [ 5.1808, -5.3406], + [ 3.1262, -3.2527], + [-0.5740, 0.4506], + [-5.9299, 6.2623], + [-4.7730, 4.9762], + [ 6.1500, -6.3283], + [-5.3240, 5.6169], + [-4.1736, 4.4488], + [-5.1070, 5.3122], + [-4.2055, 4.3549], + [-4.9765, 4.9731], + [-6.1226, 6.4954], + [-5.6320, 5.7288], + [-2.9060, 3.0610], + [-4.0964, 4.7076], + [-6.4682, 6.5295], + [-6.1218, 6.3259], + [-5.2506, 5.3559], + [-1.9106, 1.6787], + [-5.2872, 5.2387], + [-5.5988, 5.7899], + [-0.2104, -0.0231], + [-4.5680, 4.8596], + [ 1.5009, -1.4581], + [-6.0336, 6.2505], + [-5.4017, 5.6116], + [ 1.5339, -1.8448], + [-6.4283, 6.7172], + [-5.3288, 5.5511], + [-1.5387, 2.1423], + [-4.8770, 5.0885], + [-4.8818, 5.0946], + [-3.8502, 3.9349], + [-5.9644, 6.1066], + [-2.0199, 1.8392], + [-6.1566, 6.2108], + [-5.9048, 6.1916], + [-6.1862, 6.4113], + [-1.0514, 1.3462], + [-6.2592, 6.4109], + [-6.4885, 6.7909], + [ 5.3549, -5.6623], + [-4.0780, 4.1165], + [-6.1788, 6.5753], + [-4.3808, 4.5650], + [-0.1310, 0.0767], + [-5.8988, 6.0767], + [-5.7786, 6.1164], + [-3.4898, 3.6326], + [-5.9412, 5.9803], + [-3.4994, 3.8401], + [-4.8307, 4.9321], + [-6.3371, 6.5732], + [-6.4357, 6.6611], + [ 2.8975, -3.2176], + [-1.3753, 1.3016], + [-5.0549, 5.1363], + [-5.3293, 5.4232], + [-4.8784, 5.2085], + [ 2.4608, -2.7149], + [-5.1400, 5.4375], + [-6.4974, 6.6806], + [ 1.6602, -1.7671], + [-4.1638, 4.2643], + [-1.0259, 0.9300], + [ 5.8103, -6.1365], + [ 0.3955, -0.5645], + [-3.1453, 3.4731], + [-4.9151, 4.8716], + [-6.1195, 6.2626], + [-4.5805, 4.8378], + [-4.9897, 5.3028], + [-4.0741, 4.2032], + [ 0.9669, -1.1115], + [-6.0025, 6.1885], + [-5.5620, 5.7671], + [-6.2257, 6.3480], + [-6.1057, 6.3838], + [-5.2702, 5.3044], + [-0.3232, 0.6116], + [ 3.8378, -4.2644], + [-5.3533, 5.6634], + [-6.2879, 6.4401], + [-4.0008, 4.0868], + [-5.8432, 6.2744], + [ 4.1491, -4.5451], + [-6.2890, 6.5353], + [-6.1819, 6.3623], + [-2.2131, 2.3052], + [ 4.3229, -4.5866], + [-3.1891, 2.8593], + [ 1.9952, -2.0585], + [-6.5569, 6.7130], + [-5.6075, 5.7986], + [-1.4217, 1.5881], + [ 1.4035, -1.2968], + [-5.5768, 5.7459], + [-6.2973, 6.6282], + [-5.1451, 5.3383], + [-1.7994, 1.7457], + [ 5.9265, -6.1002], + [-2.9474, 3.1071], + [-3.3412, 3.1611], + [-1.6835, 1.6009], + [-6.1052, 6.3721], + [-1.4947, 1.3689], + [-0.7707, 0.7001], + [-4.7910, 4.9068], + [ 0.9678, -1.1101], + [-3.6866, 3.7852], + [-0.6533, 0.7193], + [-6.0895, 6.2967], + [ 2.4366, -2.7394], + [-5.9109, 6.2388], + [-6.5765, 6.8139], + [-1.7520, 1.7462], + [-1.1441, 1.1595], + [-5.0937, 5.4104], + [-3.6765, 3.9526], + [-6.3678, 6.5685], + [-6.3606, 6.5861], + [-6.0285, 6.1481], + [-5.4712, 5.7209], + [-4.6075, 4.7490], + [-3.7149, 3.7421], + [-1.1527, 1.1300], + [-3.8199, 4.0347], + [ 3.8017, -4.1508], + [ 0.5868, -0.4685], + [ 0.9961, -0.9910], + [-3.9779, 4.1605], + [-4.7549, 5.1030], + [-3.6805, 3.7825], + [-5.8195, 6.0141], + [-5.4879, 5.7781], + [ 6.1022, -6.3457], + [-5.2882, 5.7773], + [-6.2536, 6.5271], + [-1.3139, 1.0390], + [-3.8142, 3.6414], + [-5.1124, 5.4632], + [-5.8993, 6.0654], + [-1.1188, 1.3001], + [-5.8130, 5.9426], + [ 0.4569, -0.2437], + [ 0.2858, -0.6326], + [ 1.7318, -2.0594], + [-5.5560, 5.7920], + [-2.7611, 3.2131], + [ 3.3066, -3.4486], + [ 1.6431, -1.9738], + [-5.5743, 6.0762], + [-4.8232, 4.9528], + [-4.3774, 4.8186], + [-5.3634, 5.6591], + [-2.0493, 2.1507], + [-5.3643, 5.5589], + [-5.7033, 6.0587], + [-5.4725, 5.5861], + [-5.2288, 5.5458], + [-2.8369, 3.2100], + [-3.6985, 3.8931], + [-5.8286, 5.9134], + [ 3.0164, -3.1358], + [-4.4431, 4.6502], + [-3.6566, 3.9988], + [ 2.5094, -2.7824], + [-3.6897, 4.2747], + [ 0.0765, -0.0657], + [-2.8275, 2.9294], + [-3.8221, 4.0608], + [ 1.8925, -2.0956], + [-5.6100, 5.8891], + [-5.7607, 5.9439], + [-5.8346, 6.1447], + [-6.2044, 6.3735], + [ 3.9023, -4.2111], + [-2.4545, 2.3772], + [-3.1996, 3.5413], + [ 3.7934, -3.8814], + [-5.6864, 5.9980], + [ 4.9047, -5.1534], + [-4.6025, 4.7179], + [-5.2500, 5.5753], + [-6.2412, 6.5449], + [-0.2554, 0.7246], + [-5.4628, 5.7249], + [-3.0849, 3.3060], + [-6.4067, 6.7231], + [-6.1817, 6.3236], + [-1.2750, 1.2643], + [ 5.5006, -5.8602], + [ 2.7420, -2.9432], + [-3.7713, 3.7775], + [-5.9733, 6.2570], + [ 3.2453, -3.5118], + [-0.4006, 0.4743], + [ 1.9813, -2.1987], + [-5.8083, 5.9802], + [ 3.4238, -3.5557], + [-3.7150, 3.8339], + [-4.6771, 4.9792], + [-6.1163, 6.3086], + [-6.1757, 6.4252], + [-4.3891, 4.5174], + [-2.7916, 2.9139], + [-4.0876, 4.4466], + [ 0.1009, -0.0746], + [-4.1141, 4.3614], + [-1.4143, 1.4394], + [-3.9031, 3.8550], + [-0.6882, 0.6985], + [ 1.2770, -1.4145], + [ 5.4794, -5.6813], + [-5.0609, 5.2649], + [-5.4993, 5.6173], + [-1.5738, 1.8379], + [ 2.1759, -2.3154], + [-3.6859, 3.8172], + [-1.3385, 1.2095], + [-3.9029, 3.9547], + [-3.2872, 3.5166], + [-3.2616, 3.5630], + [-2.2251, 2.2545], + [-1.6235, 1.9166], + [ 2.4720, -2.6390], + [ 0.1020, -0.2821], + [-5.8985, 6.1546], + [-2.7383, 2.8663], + [-5.7488, 6.0406], + [-5.7318, 5.8547], + [-5.0476, 5.4773], + [-4.4254, 4.9073], + [-5.7631, 6.0530], + [-4.9959, 5.0804], + [-0.9408, 1.0922], + [-5.3368, 5.4374], + [ 5.6641, -5.9999], + [-5.5226, 5.5838], + [-3.2450, 3.3458], + [-3.0307, 3.1282], + [ 3.6625, -3.8925], + [ 3.9232, -4.0299], + [-5.3836, 5.6049], + [-6.0307, 6.2781], + [-5.0275, 5.3669], + [-5.0936, 5.3470], + [-2.5532, 2.4494], + [-0.1579, 0.2050], + [ 4.1254, -4.3251], + [-5.7063, 5.8894], + [-5.4862, 5.5982], + [-4.4634, 4.4714], + [ 2.9611, -3.1456], + [ 3.8102, -4.0485], + [-1.2880, 1.3126], + [-2.1928, 2.1304], + [ 1.5598, -1.7384], + [-5.6841, 6.0426], + [-5.7288, 6.0250], + [-4.1532, 4.5475], + [-0.1600, 0.1670], + [-5.7235, 5.8064], + [-5.8212, 6.0213], + [ 4.1067, -4.2649], + [-5.3478, 5.4820], + [ 1.5340, -1.7175], + [-0.8279, 1.0199], + [-5.8555, 6.1289], + [-6.2925, 6.4225], + [-0.0429, 0.0353], + [ 4.7918, -5.1183], + [-6.1084, 6.3690], + [ 1.8677, -1.9635], + [ 2.9775, -3.2048], + [-6.3406, 6.5678], + [ 5.3754, -5.6132], + [ 1.3222, -1.6704], + [ 3.2695, -3.3003], + [ 5.3143, -5.5934], + [ 3.6725, -3.8279], + [ 0.4126, -0.4056], + [ 2.6879, -2.9116], + [-6.2644, 6.5705], + [-0.3914, 0.1854], + [-5.1449, 5.5175], + [-4.1660, 4.3730], + [-1.6124, 1.2452], + [-6.2528, 6.4946], + [-6.2951, 6.6305], + [-2.4010, 2.3515], + [-3.9825, 3.9599], + [-5.4045, 5.6689], + [-6.0278, 6.1341], + [-6.1668, 6.3671], + [-6.4635, 6.4989], + [-1.6597, 1.7282], + [-4.7191, 4.9694], + [-5.6911, 5.8419], + [-6.1878, 6.4142], + [ 4.0492, -4.2955], + [ 4.3241, -4.5209], + [-5.3536, 5.6023], + [-6.2908, 6.5232], + [-4.1297, 4.4215], + [-6.4008, 6.6712], + [-4.1001, 4.2425], + [-5.2063, 5.3972], + [ 4.3959, -4.6939], + [-5.3697, 5.7461], + [-0.1738, 0.4089], + [-3.9937, 4.2003], + [ 6.0892, -6.2964], + [-1.5606, 1.6629], + [-5.2727, 5.5420], + [ 2.5130, -2.6077], + [ 0.1908, -0.5471], + [-5.5168, 5.9444], + [ 3.7861, -3.9183], + [-5.5239, 5.7285], + [-5.0264, 5.3372], + [-0.4217, 0.1953], + [-5.0024, 5.1247], + [-6.2831, 6.5864], + [-3.1880, 3.6496], + [-1.2881, 1.3848], + [ 2.5795, -2.7753], + [ 4.2864, -4.5203], + [-2.6889, 2.8218], + [-5.8485, 6.0493], + [-4.3386, 4.4843], + [-5.7857, 5.9239], + [ 1.6783, -2.0691], + [ 2.2090, -2.3005], + [-4.6843, 4.8318], + [ 4.0204, -4.3095], + [-3.5168, 3.5275], + [-6.0604, 6.3106], + [-4.5396, 4.9183], + [-6.1865, 6.3494], + [ 0.7698, -0.6580], + [-6.2315, 6.3883], + [-3.5874, 3.4599], + [-4.3833, 4.5417], + [-2.9737, 3.1704], + [-0.4719, 0.3472], + [-5.2900, 5.4557], + [-4.7440, 5.0188], + [-5.2014, 5.5652], + [-4.9059, 5.1014], + [-5.0469, 5.3213], + [-5.1331, 5.3882], + [-3.6878, 3.8631], + [-5.5042, 5.7608], + [-6.1877, 6.4672], + [-4.2892, 4.5407], + [-3.3143, 3.4972], + [-4.8980, 5.2241], + [ 1.8165, -2.0359], + [-6.3949, 6.6445], + [-2.6822, 2.9969], + [ 6.3797, -6.5541], + [ 2.1689, -2.4862], + [-6.2670, 6.6050], + [ 3.3467, -3.4678], + [-4.0717, 4.2618]], device='cuda:0') +06/01/2024 11:48:24 - INFO - __main__ - tensor([[[ 10.8332, 8.4007], + [ 8.4007, 10.6750]], + + [[ 10.9738, 3.2750], + [ 3.2750, 10.8684]], + + [[ 25.0666, -17.1530], + [-17.1530, 26.6878]], + + ..., + + [[ 11.0879, 8.0989], + [ 8.0989, 10.8163]], + + [[ 45.3675, -38.6159], + [-38.6158, 49.6258]], + + [[ 27.8143, -17.7233], + [-17.7233, 29.2133]]], device='cuda:0') +06/01/2024 11:48:24 - INFO - __main__ - ***** Completed training ***** +06/01/2024 11:48:28 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 11:48:28 - INFO - __main__ - ***** Starting script ***** +06/01/2024 11:48:29 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 11:48:30 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/adapter_config.json +06/01/2024 11:48:30 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 11:48:30 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_adapter.bin +06/01/2024 11:48:30 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/head_config.json +06/01/2024 11:48:30 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 11:48:30 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_model_head.bin +06/01/2024 11:48:30 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:48:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:48:30 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 11:48:30 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 11:48:30 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 11:48:30 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 11:48:38 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [101, 1000, 5262, 1010, 4238, 2323, 2022, 2404, 2006, 5060, 2008, 4073, 2000, 3046, 2000, 12661, 5712, 1999, 2037, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102, 1000, 4238, 2323, 2022, 2006, 5060, 2008, 4740, 2000, 12661, 5712, 1999, 4238, 1005, 1055, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:48:38 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [101, 3962, 2751, 2001, 9339, 2012, 1002, 4029, 2581, 1012, 3938, 1013, 4029, 2620, 1012, 3438, 2019, 19471, 2012, 6694, 13938, 2102, 1010, 2383, 9847, 2039, 2000, 1002, 4029, 2683, 1012, 2753, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2184, 1012, 102, 3962, 2751, 2001, 9339, 2012, 1002, 3486, 2620, 1012, 3515, 1013, 3486, 2683, 1012, 2321, 2019, 19471, 2012, 28714, 2692, 13938, 2102, 1010, 2383, 14051, 2004, 2152, 2004, 1002, 3486, 2683, 1012, 2423, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2423, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 11:48:38 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [101, 12411, 2015, 1012, 2198, 11260, 1998, 3960, 5846, 6430, 29492, 2000, 3713, 1012, 102, 1996, 2053, 1011, 3065, 2020, 12411, 2015, 1012, 2198, 11260, 1997, 4404, 1998, 3960, 5846, 1997, 3516, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:51:00 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 11:51:00 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 11:51:00 - INFO - __main__ - tensor([[-6.7006, 6.9338], + [ 5.3833, -5.6406], + [ 4.0183, -4.1195], + [-2.4421, 2.6537], + [ 4.3460, -4.7140], + [-5.2269, 5.2924], + [-6.6010, 6.7281], + [-5.0795, 5.4336], + [-6.2179, 6.4924], + [-6.4142, 6.6651], + [-6.7589, 6.9270], + [ 6.9078, -7.0572], + [ 1.6694, -1.7845], + [-4.7830, 4.9676], + [-5.8305, 5.9900], + [-4.4274, 4.2778], + [-6.0022, 6.2188], + [-1.3489, 1.4491], + [-5.4342, 5.6797], + [ 3.6056, -3.8304], + [ 4.4298, -4.5421], + [-4.8691, 5.1641], + [ 2.2651, -2.3744], + [-4.7021, 4.9735], + [ 1.7104, -1.8330], + [ 3.2427, -3.5329], + [-4.0933, 4.1490], + [-6.6858, 6.8540], + [ 0.7164, -0.8654], + [-5.3475, 5.5488], + [ 2.1936, -2.2314], + [-6.2836, 6.6289], + [-4.2090, 4.0975], + [-5.8936, 6.0816], + [-6.2831, 6.5266], + [-4.4880, 4.6916], + [-0.1311, 0.0304], + [ 5.7481, -5.8968], + [-1.0996, 1.0960], + [-6.6731, 6.8641], + [ 5.1493, -5.6115], + [-6.6900, 6.9683], + [ 5.2019, -5.3957], + [ 2.6623, -2.7942], + [-1.0869, 0.9509], + [-6.3505, 6.6638], + [-5.3731, 5.5585], + [ 6.4155, -6.5993], + [-5.5512, 5.8127], + [-5.1392, 5.3734], + [-6.0019, 6.2521], + [-5.3090, 5.4811], + [-5.4361, 5.4159], + [-6.5493, 6.9109], + [-6.0604, 6.1606], + [-4.2972, 4.5391], + [-4.7557, 5.3252], + [-6.7606, 6.8205], + [-6.5030, 6.7052], + [-5.5304, 5.6508], + [-3.2585, 3.1209], + [-5.2537, 5.1770], + [-6.2625, 6.4442], + [-2.0890, 1.9639], + [-4.8778, 5.1834], + [ 0.8718, -0.7659], + [-6.4596, 6.6739], + [-5.9842, 6.1974], + [ 1.5781, -1.9127], + [-6.7406, 7.0174], + [-5.9962, 6.2222], + [-1.7261, 2.2967], + [-5.9343, 6.1579], + [-5.4314, 5.6584], + [-4.7203, 4.8377], + [-6.7632, 6.9112], + [-2.2703, 2.0853], + [-6.5137, 6.5654], + [-6.3705, 6.6484], + [-6.6745, 6.9160], + [-0.7986, 1.0502], + [-6.6838, 6.8101], + [-6.8286, 7.1229], + [ 5.1497, -5.4801], + [-5.0946, 5.2039], + [-6.4899, 6.8636], + [-4.2078, 4.3530], + [-0.2935, 0.2420], + [-6.3586, 6.5210], + [-6.2964, 6.6262], + [-4.6463, 4.8188], + [-6.3176, 6.3776], + [-3.7693, 4.1027], + [-5.1094, 5.2131], + [-6.7902, 7.0222], + [-6.9272, 7.1489], + [ 1.0316, -1.2861], + [-3.1463, 3.1175], + [-5.7481, 5.8542], + [-5.8203, 5.9224], + [-5.1078, 5.4395], + [ 2.2446, -2.5045], + [-5.9827, 6.2884], + [-6.8880, 7.0616], + [ 0.5680, -0.6135], + [-4.6111, 4.6851], + [-1.5609, 1.4499], + [ 6.0281, -6.3448], + [ 0.6108, -0.7840], + [-4.1448, 4.5033], + [-5.7512, 5.7470], + [-6.5900, 6.7659], + [-5.4683, 5.7263], + [-5.6182, 5.9418], + [-4.7138, 4.8262], + [ 1.9084, -2.1229], + [-6.4698, 6.6987], + [-6.0751, 6.2758], + [-6.6113, 6.7379], + [-6.5908, 6.8548], + [-5.5346, 5.5865], + [-0.7006, 0.9805], + [ 3.4396, -3.8692], + [-5.8311, 6.1089], + [-6.7873, 6.9491], + [-4.7129, 4.8163], + [-6.2222, 6.6163], + [ 4.1772, -4.6096], + [-6.6194, 6.8737], + [-6.4848, 6.6608], + [-3.1765, 3.2785], + [ 4.7648, -5.0154], + [-3.8676, 3.6388], + [ 1.5924, -1.6774], + [-6.9959, 7.1434], + [-6.2823, 6.4530], + [-3.1352, 3.3740], + [ 0.7611, -0.6378], + [-5.8026, 5.9841], + [-6.7172, 7.0258], + [-5.6324, 5.8331], + [-2.2787, 2.1917], + [ 6.1593, -6.3419], + [-4.0450, 4.2030], + [-3.7367, 3.5921], + [-2.2595, 2.2211], + [-6.3386, 6.5893], + [-1.2430, 1.0943], + [-1.1320, 1.0627], + [-5.6270, 5.7642], + [-0.8631, 0.7568], + [-5.3292, 5.4717], + [-1.1693, 1.2572], + [-6.5969, 6.8003], + [ 1.1558, -1.3669], + [-6.3564, 6.6868], + [-7.0438, 7.2684], + [-3.3815, 3.4119], + [-1.8077, 1.8414], + [-5.4386, 5.7680], + [-4.2321, 4.5209], + [-6.5462, 6.7493], + [-6.6801, 6.8972], + [-6.5652, 6.6828], + [-5.7782, 6.0302], + [-5.7404, 5.8897], + [-4.6096, 4.6488], + [-0.3883, 0.3202], + [-4.2244, 4.4200], + [ 3.1090, -3.4506], + [-0.8011, 1.0672], + [-0.2173, 0.2691], + [-4.0406, 4.1965], + [-4.9958, 5.3522], + [-3.9423, 4.0372], + [-5.7809, 5.9828], + [-6.3093, 6.5694], + [ 6.2748, -6.5280], + [-5.7930, 6.2280], + [-6.5810, 6.8335], + [-3.2831, 3.1293], + [-4.5751, 4.4499], + [-5.8284, 6.1979], + [-6.3414, 6.5101], + [-1.4262, 1.6413], + [-6.1823, 6.3121], + [-0.0450, 0.2887], + [ 1.2505, -1.6346], + [ 2.2194, -2.5556], + [-6.3004, 6.5575], + [-2.7551, 3.2267], + [ 3.6937, -3.8393], + [ 1.1906, -1.4866], + [-5.9838, 6.4562], + [-5.4986, 5.6133], + [-5.1086, 5.5648], + [-5.9823, 6.2717], + [-3.4967, 3.6968], + [-6.1544, 6.3757], + [-6.2709, 6.6119], + [-5.8404, 5.9379], + [-6.0934, 6.3936], + [-2.8148, 3.1541], + [-4.9643, 5.1813], + [-6.2527, 6.3464], + [ 2.5227, -2.6636], + [-5.4232, 5.6314], + [-3.7772, 4.1035], + [ 1.7859, -2.0272], + [-4.4319, 4.9501], + [-0.5119, 0.5227], + [-3.5911, 3.7258], + [-3.6585, 3.8478], + [ 0.8402, -0.9839], + [-6.2238, 6.4852], + [-6.3993, 6.6042], + [-6.2200, 6.5120], + [-6.6001, 6.7669], + [ 3.6678, -3.9911], + [-3.6078, 3.5622], + [-5.1181, 5.4914], + [ 2.5538, -2.5719], + [-6.1383, 6.4345], + [ 4.9489, -5.1991], + [-4.9849, 5.0980], + [-5.8899, 6.1892], + [-6.6068, 6.8982], + [-0.9746, 1.4583], + [-6.0475, 6.3062], + [-4.8686, 5.1577], + [-6.7028, 7.0041], + [-6.6289, 6.7913], + [-1.2122, 1.1591], + [ 5.5729, -5.9269], + [ 2.1750, -2.3304], + [-4.7154, 4.8007], + [-6.6344, 6.8667], + [ 3.2961, -3.5655], + [-1.2191, 1.4049], + [ 1.4827, -1.6617], + [-6.3065, 6.4835], + [ 3.8877, -4.0300], + [-4.8417, 4.9878], + [-5.0395, 5.3234], + [-6.5610, 6.7617], + [-6.5827, 6.8306], + [-4.5111, 4.6755], + [-3.5418, 3.6780], + [-4.4897, 4.8584], + [-2.7939, 3.0327], + [-4.2391, 4.4857], + [-2.1751, 2.2212], + [-3.9412, 3.8835], + [-0.8391, 0.8213], + [ 0.7430, -0.8820], + [ 5.6671, -5.8828], + [-6.1783, 6.4295], + [-5.9248, 6.0508], + [-2.2406, 2.5361], + [ 1.9347, -2.0355], + [-3.9881, 4.1594], + [-3.3257, 3.3393], + [-4.9044, 5.0310], + [-3.4869, 3.7291], + [-4.1423, 4.4313], + [-2.6378, 2.7023], + [-2.2765, 2.6050], + [ 2.4891, -2.6686], + [-0.6892, 0.5099], + [-6.1664, 6.4008], + [-0.6367, 0.6319], + [-6.1641, 6.4496], + [-6.2305, 6.3669], + [-5.5652, 5.9730], + [-4.9388, 5.3682], + [-6.2608, 6.5375], + [-6.0764, 6.1699], + [-2.1975, 2.3451], + [-5.7778, 5.8621], + [ 5.9198, -6.2351], + [-5.6969, 5.7292], + [-4.0736, 4.1897], + [-4.1724, 4.2708], + [ 2.5095, -2.6482], + [ 3.5013, -3.6229], + [-5.9874, 6.2123], + [-6.5975, 6.8081], + [-5.9358, 6.2494], + [-5.7537, 6.0057], + [-4.1943, 4.1526], + [-0.4440, 0.5145], + [ 3.9450, -4.1498], + [-6.1943, 6.3727], + [-6.0787, 6.2172], + [-5.3355, 5.3792], + [ 3.2158, -3.4297], + [ 3.1418, -3.3290], + [-1.2592, 1.2408], + [-4.6693, 4.7141], + [-0.9198, 0.8662], + [-6.1593, 6.4890], + [-6.2242, 6.4972], + [-4.6816, 5.0682], + [-1.1097, 1.1204], + [-6.3099, 6.4518], + [-6.2976, 6.5160], + [ 4.0527, -4.1960], + [-5.7670, 5.9121], + [ 1.4299, -1.5973], + [-2.0126, 2.2295], + [-6.3349, 6.5858], + [-6.7700, 6.8816], + [-1.0966, 1.0764], + [ 4.6613, -5.0041], + [-6.5470, 6.8150], + [ 1.6087, -1.7036], + [ 0.2032, -0.2899], + [-6.7943, 7.0102], + [ 5.4607, -5.6870], + [ 0.1019, -0.4108], + [ 4.1561, -4.2451], + [ 5.1982, -5.4649], + [ 3.6612, -3.8420], + [ 0.7855, -0.8010], + [ 2.7237, -2.9719], + [-6.6447, 6.9373], + [-2.5937, 2.4917], + [-5.7206, 6.0727], + [-4.8831, 5.1117], + [-3.2529, 2.9742], + [-6.4476, 6.6661], + [-6.6567, 6.9744], + [-3.4438, 3.4262], + [-4.0124, 3.9883], + [-6.2243, 6.4904], + [-6.5021, 6.6105], + [-6.5784, 6.7791], + [-6.8498, 6.9059], + [-3.4766, 3.6260], + [-4.7678, 5.0389], + [-6.2895, 6.4401], + [-6.6274, 6.8378], + [ 4.2890, -4.5101], + [ 4.4038, -4.6102], + [-5.7996, 6.0262], + [-6.5503, 6.7762], + [-4.9432, 5.2328], + [-6.7378, 7.0112], + [-4.3075, 4.4443], + [-5.8029, 6.0090], + [ 4.1137, -4.4149], + [-5.8890, 6.2682], + [-1.5013, 1.8081], + [-4.1895, 4.3838], + [ 5.7329, -5.9438], + [-3.1857, 3.3760], + [-5.4126, 5.6843], + [-0.0134, -0.0437], + [ 1.0434, -1.3863], + [-5.9258, 6.3443], + [ 4.3674, -4.5203], + [-5.8349, 6.0247], + [-5.5278, 5.8386], + [ 0.5459, -0.8461], + [-6.0432, 6.1691], + [-6.6702, 6.9549], + [-4.1851, 4.6449], + [-2.3056, 2.4563], + [ 2.2793, -2.4451], + [ 4.6173, -4.8514], + [-3.0470, 3.1281], + [-6.3059, 6.5226], + [-4.9301, 5.0845], + [-6.4945, 6.6230], + [ 1.9065, -2.3571], + [ 2.9473, -3.0807], + [-5.0526, 5.1566], + [ 4.6704, -4.9727], + [-3.8386, 3.8195], + [-6.4962, 6.7260], + [-5.2815, 5.6259], + [-6.6895, 6.8451], + [-0.3285, 0.5150], + [-6.7176, 6.8740], + [-4.3125, 4.2649], + [-4.8969, 5.0402], + [-3.5333, 3.7244], + [-0.8712, 0.7512], + [-5.8589, 6.0084], + [-5.6672, 5.9648], + [-5.9524, 6.2722], + [-5.6855, 5.8820], + [-5.9863, 6.2362], + [-5.6005, 5.8300], + [-4.0901, 4.2718], + [-5.9252, 6.1739], + [-6.6742, 6.9319], + [-5.5701, 5.8744], + [-5.2621, 5.5470], + [-5.9100, 6.2203], + [ 0.9847, -1.1969], + [-6.8219, 7.0621], + [-4.1528, 4.4787], + [ 6.5392, -6.7196], + [ 2.8160, -3.1411], + [-6.6524, 6.9714], + [ 2.2140, -2.2975], + [-4.5694, 4.7535]], device='cuda:0') +06/01/2024 11:51:00 - INFO - __main__ - tensor([[[ 12.6665, 10.2149], + [ 10.2149, 12.5281]], + + [[ 13.8176, 2.0258], + [ 2.0258, 13.6575]], + + [[ 27.3605, -17.4388], + [-17.4388, 28.8920]], + + ..., + + [[ 12.9753, 9.9862], + [ 9.9862, 12.7036]], + + [[ 82.1905, -78.8198], + [-78.8199, 91.5645]], + + [[ 32.7090, -20.0892], + [-20.0892, 34.2897]]], device='cuda:0') +06/01/2024 11:51:00 - INFO - __main__ - ***** Completed training ***** +06/01/2024 11:51:05 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 11:51:05 - INFO - __main__ - ***** Starting script ***** +06/01/2024 11:51:05 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 11:51:06 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/adapter_config.json +06/01/2024 11:51:06 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 11:51:06 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_adapter.bin +06/01/2024 11:51:06 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/head_config.json +06/01/2024 11:51:06 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 11:51:06 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_model_head.bin +06/01/2024 11:51:06 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 11:51:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 11:51:06 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 11:51:06 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 11:51:06 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 11:51:06 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 11:51:15 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [101, 1000, 5262, 1010, 4238, 2323, 2022, 2404, 2006, 5060, 2008, 4073, 2000, 3046, 2000, 12661, 5712, 1999, 2037, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102, 1000, 4238, 2323, 2022, 2006, 5060, 2008, 4740, 2000, 12661, 5712, 1999, 4238, 1005, 1055, 3746, 2097, 2022, 24663, 2404, 2091, 1010, 1000, 2002, 2056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:51:15 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [101, 3962, 2751, 2001, 9339, 2012, 1002, 4029, 2581, 1012, 3938, 1013, 4029, 2620, 1012, 3438, 2019, 19471, 2012, 6694, 13938, 2102, 1010, 2383, 9847, 2039, 2000, 1002, 4029, 2683, 1012, 2753, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2184, 1012, 102, 3962, 2751, 2001, 9339, 2012, 1002, 3486, 2620, 1012, 3515, 1013, 3486, 2683, 1012, 2321, 2019, 19471, 2012, 28714, 2692, 13938, 2102, 1010, 2383, 14051, 2004, 2152, 2004, 1002, 3486, 2683, 1012, 2423, 1011, 1011, 1037, 2504, 2025, 2464, 2144, 2337, 2423, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 11:51:15 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [101, 12411, 2015, 1012, 2198, 11260, 1998, 3960, 5846, 6430, 29492, 2000, 3713, 1012, 102, 1996, 2053, 1011, 3065, 2020, 12411, 2015, 1012, 2198, 11260, 1997, 4404, 1998, 3960, 5846, 1997, 3516, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 11:53:39 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 11:53:39 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 11:53:39 - INFO - __main__ - tensor([[-7.0479, 7.2739], + [ 5.9527, -6.2088], + [ 4.8475, -4.9827], + [-2.1655, 2.3737], + [ 5.3760, -5.7444], + [-4.9074, 4.9241], + [-6.9649, 7.0766], + [-4.9844, 5.3201], + [-6.4598, 6.7225], + [-6.7842, 7.0131], + [-7.1248, 7.2729], + [ 7.3769, -7.5309], + [ 2.2980, -2.4590], + [-4.6285, 4.7970], + [-5.5991, 5.7278], + [-3.4950, 3.2527], + [-6.2605, 6.4698], + [-1.1696, 1.2384], + [-5.7263, 5.9534], + [ 4.3769, -4.6171], + [ 4.8916, -5.0121], + [-4.9956, 5.2628], + [ 3.5027, -3.6689], + [-4.9511, 5.1927], + [ 3.1553, -3.3465], + [ 3.9966, -4.2991], + [-3.7771, 3.7978], + [-7.0051, 7.1724], + [ 2.4181, -2.6272], + [-5.5738, 5.7669], + [ 2.7079, -2.7558], + [-6.6014, 6.9475], + [-3.8406, 3.6823], + [-6.0333, 6.2105], + [-6.4445, 6.6678], + [-4.0510, 4.2361], + [ 0.1511, -0.2769], + [ 6.3622, -6.5250], + [-0.3361, 0.2649], + [-6.9695, 7.1573], + [ 5.7915, -6.2255], + [-6.9474, 7.2073], + [ 5.9214, -6.1155], + [ 3.3541, -3.5021], + [-0.8591, 0.6860], + [-6.6242, 6.9276], + [-5.6302, 5.7984], + [ 6.9215, -7.0950], + [-5.7464, 6.0020], + [-5.1099, 5.3322], + [-5.9835, 6.2200], + [-5.3521, 5.4986], + [-5.7697, 5.7463], + [-6.9234, 7.2721], + [-6.1103, 6.1699], + [-3.9686, 4.1604], + [-4.2880, 4.8721], + [-7.0166, 7.0652], + [-6.7967, 6.9917], + [-5.6526, 5.7360], + [-2.1392, 1.8998], + [-5.0340, 4.9153], + [-6.2070, 6.3849], + [-1.1192, 0.9010], + [-5.4446, 5.7447], + [ 1.8286, -1.7825], + [-6.8116, 7.0020], + [-6.1833, 6.3835], + [ 3.1291, -3.5170], + [-7.1176, 7.3746], + [-5.7338, 5.9566], + [-1.2297, 1.7399], + [-6.1714, 6.3608], + [-5.3737, 5.5659], + [-4.5804, 4.6793], + [-6.9851, 7.1126], + [-1.6759, 1.4357], + [-6.8815, 6.9148], + [-6.5269, 6.8078], + [-6.8617, 7.0872], + [-0.7727, 1.0006], + [-7.0314, 7.1486], + [-7.1830, 7.4611], + [ 5.9936, -6.3019], + [-4.8366, 4.8929], + [-6.8197, 7.1722], + [-3.4704, 3.5607], + [ 0.0914, -0.1718], + [-6.6156, 6.7603], + [-6.6696, 6.9858], + [-4.6856, 4.8178], + [-6.2004, 6.2309], + [-3.5361, 3.8572], + [-4.6645, 4.7348], + [-6.9963, 7.2083], + [-7.2891, 7.4951], + [ 1.2676, -1.5539], + [-2.1612, 2.0623], + [-5.8296, 5.9008], + [-6.0512, 6.1191], + [-5.1845, 5.5083], + [ 2.3449, -2.6217], + [-6.2321, 6.5146], + [-7.1900, 7.3543], + [ 1.9513, -2.0635], + [-4.2552, 4.3379], + [-1.0962, 0.9426], + [ 6.5213, -6.8272], + [ 1.7964, -2.0031], + [-3.6678, 3.9987], + [-6.0108, 5.9843], + [-6.8985, 7.0483], + [-5.4607, 5.6952], + [-5.9463, 6.2537], + [-4.7714, 4.8641], + [ 2.9284, -3.2004], + [-6.7720, 6.9756], + [-6.1006, 6.2859], + [-6.9303, 7.0296], + [-6.8825, 7.1269], + [-5.7595, 5.7921], + [-0.0802, 0.3310], + [ 3.8475, -4.2806], + [-6.1184, 6.3845], + [-7.1538, 7.3006], + [-4.5027, 4.5694], + [-6.5407, 6.9202], + [ 4.8215, -5.2456], + [-6.9382, 7.1695], + [-6.7166, 6.8895], + [-2.4901, 2.5320], + [ 5.4038, -5.6534], + [-3.2246, 2.8957], + [ 2.9033, -3.0180], + [-7.2842, 7.4237], + [-6.5195, 6.6688], + [-3.1587, 3.3603], + [ 1.2559, -1.1688], + [-6.1078, 6.2857], + [-7.0548, 7.3501], + [-5.6998, 5.8933], + [-1.4064, 1.2548], + [ 6.4789, -6.6624], + [-3.8431, 3.9840], + [-3.4025, 3.2274], + [-1.1484, 1.0693], + [-6.6814, 6.9078], + [ 0.3769, -0.6188], + [-0.6999, 0.6076], + [-5.4013, 5.5171], + [ 0.4227, -0.5822], + [-4.1331, 4.2121], + [-0.3519, 0.3741], + [-6.9277, 7.1100], + [ 1.6467, -1.8976], + [-6.4987, 6.8259], + [-7.3267, 7.5382], + [-3.0231, 3.0037], + [-2.0210, 2.0518], + [-5.5074, 5.8094], + [-4.4433, 4.7076], + [-6.7511, 6.9320], + [-6.9867, 7.1907], + [-6.9220, 7.0238], + [-5.7729, 5.9914], + [-5.8531, 5.9977], + [-4.6219, 4.6369], + [-0.5464, 0.4741], + [-3.8738, 4.0563], + [ 3.9184, -4.2839], + [-0.0953, 0.2662], + [ 0.7882, -0.7824], + [-3.6629, 3.7789], + [-4.8026, 5.1753], + [-3.5560, 3.6235], + [-6.2055, 6.3797], + [-6.5431, 6.7961], + [ 6.6850, -6.9363], + [-6.0236, 6.4422], + [-6.8496, 7.0836], + [-3.2807, 3.1077], + [-4.6064, 4.4641], + [-6.0141, 6.3659], + [-6.7275, 6.8811], + [-0.8954, 1.0701], + [-6.4197, 6.5287], + [-0.0681, 0.3046], + [ 2.4865, -2.8943], + [ 2.1900, -2.5323], + [-6.4001, 6.6457], + [-2.5314, 2.9605], + [ 4.4074, -4.5814], + [ 1.2027, -1.5295], + [-6.3266, 6.7848], + [-5.5369, 5.6278], + [-5.1451, 5.5746], + [-6.2665, 6.5334], + [-2.4032, 2.5206], + [-6.0438, 6.2260], + [-6.4107, 6.7422], + [-5.8806, 5.9629], + [-6.1615, 6.4488], + [-2.3387, 2.6225], + [-4.8789, 5.0906], + [-6.5063, 6.5818], + [ 4.1489, -4.3464], + [-5.3329, 5.5316], + [-3.6502, 3.9791], + [ 2.3587, -2.6415], + [-4.5076, 5.0358], + [ 0.2718, -0.2989], + [-2.6947, 2.7981], + [-3.4431, 3.6031], + [ 0.6177, -0.7716], + [-6.5220, 6.7790], + [-6.6726, 6.8716], + [-6.4703, 6.7467], + [-6.7368, 6.8866], + [ 3.9973, -4.3257], + [-3.2708, 3.1986], + [-4.9660, 5.3106], + [ 4.3019, -4.3905], + [-6.3621, 6.6492], + [ 5.6188, -5.8684], + [-4.4862, 4.5530], + [-6.3000, 6.5775], + [-6.9778, 7.2528], + [-0.3996, 0.8648], + [-6.1134, 6.3561], + [-4.7232, 4.9908], + [-7.0054, 7.2719], + [-6.8577, 6.9899], + [-0.4848, 0.3852], + [ 6.1075, -6.4513], + [ 3.1033, -3.3054], + [-4.5145, 4.5508], + [-6.8793, 7.1065], + [ 4.0710, -4.3489], + [-1.3873, 1.5148], + [ 2.8950, -3.1245], + [-6.4100, 6.5627], + [ 4.0257, -4.1749], + [-4.8686, 4.9921], + [-5.1698, 5.4380], + [-6.8326, 7.0259], + [-6.9010, 7.1356], + [-4.8410, 4.9959], + [-2.4775, 2.5805], + [-4.3907, 4.7488], + [-2.3835, 2.5579], + [-3.8696, 4.0964], + [-0.8931, 0.8658], + [-3.6325, 3.5153], + [-0.0326, -0.0532], + [ 1.5458, -1.7478], + [ 6.1332, -6.3512], + [-6.1399, 6.3563], + [-6.3185, 6.4332], + [-1.1727, 1.4283], + [ 2.5657, -2.7030], + [-4.0069, 4.1320], + [-2.2007, 2.0997], + [-4.3895, 4.4496], + [-3.1863, 3.4127], + [-3.9467, 4.2143], + [-2.2509, 2.2705], + [-1.5716, 1.8438], + [ 2.6071, -2.7950], + [-0.4562, 0.2455], + [-6.3789, 6.5976], + [ 0.9125, -1.0251], + [-6.4165, 6.6867], + [-6.5139, 6.6377], + [-5.7064, 6.1097], + [-5.1517, 5.5781], + [-6.5623, 6.8125], + [-6.2352, 6.3101], + [-1.5600, 1.6789], + [-5.9964, 6.0546], + [ 6.3364, -6.6561], + [-5.9071, 5.9183], + [-3.9948, 4.0785], + [-3.8437, 3.9016], + [ 3.1654, -3.3585], + [ 4.5512, -4.6982], + [-6.1876, 6.4044], + [-6.9071, 7.1028], + [-6.0790, 6.3676], + [-5.8970, 6.1329], + [-3.7526, 3.6670], + [ 0.0686, -0.0242], + [ 4.2259, -4.4386], + [-6.4462, 6.6075], + [-6.2886, 6.4126], + [-5.3650, 5.3673], + [ 4.3063, -4.5217], + [ 3.7953, -4.0110], + [-1.8612, 1.8405], + [-4.5301, 4.5425], + [ 1.3921, -1.5668], + [-6.4949, 6.8144], + [-6.5915, 6.8515], + [-4.8509, 5.2166], + [-0.3147, 0.2622], + [-6.6217, 6.7469], + [-6.4193, 6.6131], + [ 4.6901, -4.8632], + [-5.9959, 6.1281], + [ 2.0039, -2.2034], + [-1.6738, 1.8384], + [-6.3479, 6.5819], + [-7.0082, 7.1130], + [-0.0811, 0.0333], + [ 5.4114, -5.7437], + [-6.9454, 7.1976], + [ 1.5299, -1.6333], + [ 1.6347, -1.7858], + [-7.1446, 7.3423], + [ 5.9604, -6.1857], + [ 1.0251, -1.3832], + [ 4.6207, -4.7226], + [ 5.7206, -5.9800], + [ 4.3176, -4.5196], + [ 1.8781, -1.9576], + [ 3.9368, -4.1669], + [-7.0019, 7.2777], + [-1.9957, 1.8292], + [-5.9461, 6.2945], + [-4.7424, 4.9563], + [-2.2419, 1.8781], + [-6.8054, 7.0123], + [-6.9938, 7.3025], + [-2.7242, 2.6442], + [-3.9775, 3.9179], + [-6.3853, 6.6306], + [-6.7535, 6.8394], + [-6.9160, 7.1037], + [-7.1520, 7.1716], + [-2.4011, 2.4702], + [-4.6557, 4.9052], + [-6.5597, 6.7003], + [-6.9086, 7.1003], + [ 4.8093, -5.0427], + [ 5.1495, -5.3699], + [-5.8778, 6.0974], + [-6.8632, 7.0666], + [-4.5729, 4.8488], + [-7.1512, 7.4068], + [-4.3161, 4.4072], + [-6.0667, 6.2398], + [ 4.9413, -5.2402], + [-5.9040, 6.2716], + [-0.9428, 1.2060], + [-3.9754, 4.1236], + [ 6.4565, -6.6634], + [-2.6956, 2.8434], + [-5.6462, 5.9014], + [ 1.4036, -1.5144], + [ 1.5763, -1.9307], + [-6.2625, 6.6701], + [ 4.8749, -5.0401], + [-5.7896, 5.9670], + [-5.6984, 5.9985], + [ 1.6775, -2.0514], + [-6.2373, 6.3532], + [-7.0470, 7.3160], + [-3.7738, 4.2362], + [-1.7288, 1.8076], + [ 2.8741, -3.0739], + [ 5.1285, -5.3676], + [-2.8818, 2.9187], + [-6.6664, 6.8625], + [-4.7116, 4.8239], + [-6.6704, 6.7838], + [ 2.1682, -2.6249], + [ 3.8699, -4.0447], + [-4.9328, 4.9998], + [ 5.3057, -5.6171], + [-3.5225, 3.4576], + [-6.8482, 7.0584], + [-4.6868, 4.9854], + [-7.0377, 7.1862], + [ 0.3145, -0.2062], + [-7.0679, 7.2156], + [-3.9032, 3.7724], + [-4.6116, 4.7351], + [-3.3955, 3.5727], + [ 0.0969, -0.2697], + [-6.2297, 6.3615], + [-5.4392, 5.7160], + [-6.2038, 6.5166], + [-5.9781, 6.1653], + [-6.0185, 6.2591], + [-5.9039, 6.1344], + [-3.9018, 4.0468], + [-6.1808, 6.4184], + [-7.0839, 7.3228], + [-5.7197, 5.9879], + [-4.9020, 5.1351], + [-6.2465, 6.5423], + [ 1.1193, -1.3558], + [-7.1473, 7.3724], + [-4.0857, 4.3903], + [ 6.9042, -7.0852], + [ 3.4790, -3.8200], + [-7.0390, 7.3399], + [ 3.4802, -3.6360], + [-3.5673, 3.7094]], device='cuda:0') +06/01/2024 11:53:39 - INFO - __main__ - tensor([[[ 19.7339, 15.7823], + [ 15.7823, 19.5145]], + + [[ 21.5968, 4.7433], + [ 4.7433, 21.2661]], + + [[ 37.8045, -19.1287], + [ -19.1288, 39.2573]], + + ..., + + [[ 20.2446, 15.6431], + [ 15.6431, 19.8371]], + + [[ 114.9205, -104.4698], + [-104.4697, 124.2273]], + + [[ 96.6547, -85.1112], + [ -85.1112, 103.8016]]], device='cuda:0') +06/01/2024 11:53:39 - INFO - __main__ - ***** Completed training ***** diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..38f218e6248986ab9fd9946ec8fb3e4dd10750d8 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5c340ee078dd34e63f2ece7311b2000c2656dcb82475fc69852cb61dd8480c9 +size 69 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..d9cc8e69a2c727cccf913249d6cefba606f290af --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:238e336de54b22ba2e51b422b5c3e6ffb986047ab9fc3564ae64ba8e22a6fb9b +size 68667 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..da06b58bda22aafa7c2cb5c88ee0afda749a94e5 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecfe87259e2e429200d03c344842629eafc4a94ca868a77feed719e295991414 +size 6117 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..88490b57d39e9ed5184809896b57171cbcad6c2c --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d63b62c0874c2f4f8e1b95ad25aaeafd910795ec519b6990474c1a96f9480adc +size 68 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..65c96da4e5bbd31b9e94f32c406e19db2ccf066d --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:680d6a81a26decd83b1066d9d1b8b5660e50edc6e5ef8390177dd1b9cd399005 +size 69192 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..c318bb886ed13fba99edd46d907034e384fc3185 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c44b1d5774d7a0140f9490f19c6522dc7ae4ae52a4936f17a97e56260564f5b4 +size 6126 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..90c7e6b09b6a2c0480c6d02c875c3a23540445ac --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee37011a76c331e5d81afafae436bf65f8ef27864968ebad7db073046f7196f5 +size 68 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..714e39a5a18eb39a3f803592c7c8ba135bdf28a7 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65b5d6b860784277d3407fcb662151ef7401237646d22ddd00cde3aa4a5b1e3 +size 69224 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..95a2405be301b2fc7328e6d734ce989e55be2ea8 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2900938a34c4900220e231392afad9c2c87ea6a93e129ca8747be441d3e321b +size 6132 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..b77f9ecf16edec2f8320b30d4fcae1e69537df9b --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05642000bfd35d8399125bc08678dfb01c74e8ae4e34be30bb48c7bc86639405 +size 68 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..20051e9647fdec58fc3e49bb40ba3370434bba2b --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc986dd29f091adee1a46af3ecd42016a0b00e9e202511f3c88fe036ab119fda +size 69368 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..178f49c481da3647f4293950d4689a0018b315ba --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ee178a4f8ca7625d30afaefe2284db9288722727900095f5bc46aa86e8fe72 +size 6141 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..62016996ef6694b2536bc3746a92a61adc91ea42 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22be691ad6d600e15acf1701ca9d926b42e740f74dab3bb43f6e070056dabd6 +size 68 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..eff177441b3d72d4ecf04bbb9defc0aa8e60a97f --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a851b20df67042973f2242e4af2e08ef5154fa8ec35279ec24902468bbb4697f +size 69456 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..9025018dd25f9bb0bbb619f3bd664f24f26cca99 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68137da4aa4cba3d12a8a7050809107fc8788963f0121e4bfdecba98f8d9ecd1 +size 6168 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..f92354758eaa58940ea9c748a1d1ebd43d367c67 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f78f933b7b7c9c44ce7d7a5a3e85a7fff35981f2a0bcd28bdbcca11b64f09a6 +size 68 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..416bb2a564ed45d74c9026e82a42bc3f896c2a44 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb219c2690e703c318aef585ea697a44bc420a0bab5de8820cd841d0afd996e8 +size 69275 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..1b9cb590d6bbc0da3500e9ff34b17f7babe47df3 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7767e31bf735f013d27120195c2ab3738c953460ff0f6ebad7f3764abfe0b79b +size 6162 diff --git a/outputs/mrpc/bert-base-uncased_adapterstrain_val_5e-05_42_8_10000/logfile_la_{args.laplace_sub}.log b/outputs/mrpc/bert-base-uncased_adapterstrain_val_5e-05_42_8_10000/logfile_la_{args.laplace_sub}.log new file mode 100644 index 0000000000000000000000000000000000000000..fda3de4740126ef13bed1c41d20a1606732589e2 --- /dev/null +++ b/outputs/mrpc/bert-base-uncased_adapterstrain_val_5e-05_42_8_10000/logfile_la_{args.laplace_sub}.log @@ -0,0 +1,10 @@ +06/01/2024 11:24:10 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 11:24:10 - INFO - __main__ - ***** Starting script ***** +06/01/2024 11:24:16 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 11:24:17 - INFO - adapters.utils - Attempting to load adapter from source 'hf'... +06/01/2024 11:24:17 - INFO - adapters.utils - Repo id must be in the form 'repo_name' or 'namespace/repo_name': './outputs/mrpc/bert-base-uncased_adapterstrain_val_5e-05_42_8_10000/step_0'. Use `repo_type` argument if needed. +06/01/2024 11:24:17 - INFO - adapters.utils - Attempting to load adapter from source 'ah'... +06/01/2024 11:24:18 - INFO - adapters.utils - https://raw.githubusercontent.com/Adapter-Hub/Hub/master/dist/v2/index/bert-base-uncased.json not found in cache or force_download set to True, downloading to /root/.cache/torch/adapters/tmp7fkjbr_f +06/01/2024 11:24:18 - INFO - adapters.utils - storing https://raw.githubusercontent.com/Adapter-Hub/Hub/master/dist/v2/index/bert-base-uncased.json in cache at /root/.cache/torch/adapters/22e416a3791c0b8e1aafcfac89db490ae05250204ed58e5d81a8645b0726dda0.b141886e1b58ad87e04b024247bf438580086cd7bd78529838848a12d9323e20 +06/01/2024 11:24:18 - INFO - adapters.utils - creating metadata file for /root/.cache/torch/adapters/22e416a3791c0b8e1aafcfac89db490ae05250204ed58e5d81a8645b0726dda0.b141886e1b58ad87e04b024247bf438580086cd7bd78529838848a12d9323e20 +06/01/2024 11:24:18 - INFO - adapters.utils - No adapter with name './outputs/mrpc/bert-base-uncased_adapterstrain_val_5e-05_42_8_10000/step_0' was found in the adapter index. diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log new file mode 100644 index 0000000000000000000000000000000000000000..beac832e1ca36b18e4de998edb37f036fea0458f --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log @@ -0,0 +1,3258 @@ +06/01/2024 12:49:38 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:49:38 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:49:39 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:49:40 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/adapter_config.json +06/01/2024 12:49:40 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 12:49:40 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_adapter.bin +06/01/2024 12:49:40 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/head_config.json +06/01/2024 12:49:40 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:49:40 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_model_head.bin +06/01/2024 12:49:40 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:49:40 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:49:40 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 12:49:40 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 12:49:40 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 12:49:40 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 12:49:48 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [0, 113, 7908, 2156, 1603, 197, 28, 342, 15, 3120, 14, 1170, 7, 860, 7, 21280, 3345, 11, 49, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2, 2, 113, 1603, 197, 28, 15, 3120, 14, 3881, 7, 21280, 3345, 11, 1603, 128, 29, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:49:48 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [0, 32110, 1637, 21, 5304, 23, 68, 41682, 4, 3248, 1589, 42100, 4, 2466, 41, 10671, 23, 10775, 5050, 2156, 519, 15199, 62, 7, 68, 41821, 4, 1096, 480, 10, 672, 45, 450, 187, 902, 158, 479, 2, 2, 32110, 1637, 21, 5304, 23, 68, 40156, 4, 3506, 1589, 40598, 4, 996, 41, 10671, 23, 321, 1497, 5050, 2156, 519, 385, 26587, 25, 239, 25, 68, 40598, 4, 1244, 480, 10, 672, 45, 450, 187, 902, 564, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:49:48 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [0, 104, 1290, 4, 610, 9153, 8, 3045, 4572, 1981, 30790, 7, 1994, 479, 2, 2, 133, 117, 12, 35624, 58, 12274, 4, 610, 9153, 9, 5517, 8, 3045, 4572, 9, 1261, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:52:08 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 12:52:08 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 12:52:08 - INFO - __main__ - tensor([[-0.0987, 0.1301], + [-0.1007, 0.1022], + [-0.0830, 0.1416], + [-0.0790, 0.1179], + [-0.0785, 0.1183], + [-0.0775, 0.0579], + [-0.0706, 0.1290], + [-0.0552, 0.1185], + [-0.0603, 0.1169], + [-0.0935, 0.1220], + [-0.0686, 0.1324], + [-0.1019, 0.0387], + [-0.0540, 0.0619], + [-0.0614, 0.1016], + [-0.0906, 0.1230], + [-0.0792, 0.1077], + [-0.0721, 0.1171], + [-0.0519, 0.0951], + [-0.0744, 0.1411], + [-0.0714, 0.1078], + [-0.0925, 0.0974], + [-0.0650, 0.1133], + [-0.1290, 0.0914], + [-0.0937, 0.1547], + [-0.1047, 0.1181], + [-0.0859, 0.1082], + [-0.1272, 0.0835], + [-0.0756, 0.1208], + [-0.0886, 0.0919], + [-0.0672, 0.0916], + [-0.0912, 0.1186], + [-0.0861, 0.1406], + [-0.0827, 0.1027], + [-0.0757, 0.1340], + [-0.0882, 0.1058], + [-0.0918, 0.1166], + [-0.1018, 0.1073], + [-0.0739, 0.1335], + [-0.0642, 0.0090], + [-0.0696, 0.1192], + [-0.0957, 0.1147], + [-0.0779, 0.1286], + [-0.1009, 0.1000], + [-0.0931, 0.1061], + [-0.0421, 0.0934], + [-0.0756, 0.1287], + [-0.0747, 0.1280], + [-0.1075, 0.0180], + [-0.0690, 0.1302], + [-0.0940, 0.1108], + [-0.1021, 0.0893], + [-0.1197, -0.0174], + [-0.0709, 0.1190], + [-0.1063, 0.0932], + [-0.0821, 0.1405], + [-0.1247, 0.1062], + [-0.1019, 0.1110], + [-0.0898, 0.1353], + [-0.1084, 0.0553], + [-0.0741, 0.1114], + [-0.0703, 0.1231], + [-0.0858, 0.1434], + [-0.0819, 0.1408], + [-0.0811, 0.1391], + [-0.1198, 0.1101], + [-0.0453, 0.0734], + [-0.0999, 0.1098], + [-0.1093, 0.1461], + [-0.0687, 0.1401], + [-0.0731, 0.1200], + [-0.0967, 0.0987], + [-0.0694, 0.1097], + [-0.0706, 0.1299], + [-0.0785, 0.1054], + [-0.0920, 0.1259], + [-0.0794, 0.1080], + [-0.1093, 0.1005], + [-0.0913, 0.1426], + [-0.0971, 0.0406], + [-0.1085, 0.0525], + [-0.0960, 0.1032], + [-0.0771, 0.1408], + [-0.1240, 0.0343], + [-0.0642, 0.1286], + [-0.0808, 0.0909], + [-0.1205, 0.0772], + [-0.1162, 0.0879], + [-0.0687, 0.1028], + [-0.0606, 0.1302], + [-0.0782, 0.1045], + [-0.0912, 0.1131], + [-0.0867, 0.1256], + [-0.0715, 0.1361], + [-0.0810, 0.1501], + [-0.0829, 0.1204], + [-0.0847, 0.1295], + [-0.0817, 0.0992], + [-0.1305, 0.0853], + [-0.0815, 0.1317], + [-0.2119, 0.0045], + [-0.0737, 0.1012], + [-0.0821, 0.1337], + [-0.0902, 0.1173], + [-0.0765, 0.1472], + [-0.0902, 0.1231], + [-0.1345, 0.0912], + [-0.1200, 0.0978], + [-0.1868, 0.0398], + [-0.0839, 0.1340], + [-0.0806, 0.1096], + [-0.0899, 0.1174], + [-0.0848, 0.1541], + [-0.0710, 0.1177], + [-0.0732, 0.1246], + [-0.0641, 0.0935], + [-0.0901, 0.1093], + [-0.0729, 0.1194], + [-0.0803, 0.1301], + [-0.0577, 0.1241], + [-0.1015, 0.1056], + [-0.0799, 0.1491], + [-0.1267, 0.0464], + [-0.0880, 0.0791], + [-0.0646, 0.1170], + [-0.0919, 0.0971], + [-0.0994, 0.1124], + [-0.1214, 0.0772], + [-0.0855, 0.1220], + [-0.0934, 0.1071], + [-0.1367, 0.0041], + [-0.0837, 0.1372], + [-0.1001, 0.1083], + [-0.0948, 0.0275], + [-0.1947, -0.0498], + [-0.1106, 0.0883], + [-0.0518, 0.1188], + [-0.1299, 0.0713], + [-0.0463, 0.0560], + [-0.0669, 0.1250], + [-0.0879, 0.1264], + [-0.0855, 0.1357], + [-0.0809, 0.1101], + [-0.1076, 0.1021], + [-0.0648, 0.1085], + [-0.1145, 0.0644], + [-0.0505, 0.1226], + [-0.1099, 0.1381], + [-0.1334, 0.1113], + [-0.1007, 0.1062], + [-0.0509, 0.1346], + [-0.0812, 0.1410], + [-0.0932, 0.0912], + [-0.0934, 0.1513], + [-0.0750, 0.1432], + [-0.0784, 0.1054], + [-0.0663, 0.1075], + [-0.1724, 0.0345], + [-0.0964, 0.1051], + [-0.1060, 0.1147], + [-0.0974, 0.1020], + [-0.1183, 0.1241], + [-0.1130, 0.0970], + [-0.0732, 0.1408], + [-0.1170, 0.0969], + [-0.0933, 0.1349], + [-0.0768, 0.1109], + [-0.1971, 0.0543], + [-0.1009, 0.0809], + [-0.0726, 0.1499], + [-0.0753, 0.1206], + [-0.0846, 0.0972], + [-0.0984, 0.1040], + [-0.0679, 0.1592], + [-0.1520, 0.0508], + [-0.0661, 0.0881], + [-0.1192, 0.1017], + [-0.0785, 0.1270], + [-0.0734, -0.0776], + [-0.0743, 0.1226], + [-0.1035, 0.1189], + [-0.0792, 0.1230], + [-0.0853, 0.1184], + [-0.0875, 0.1410], + [-0.0711, 0.1139], + [-0.0803, 0.1175], + [-0.0791, 0.1475], + [-0.0682, 0.0981], + [-0.1035, 0.1013], + [-0.0819, 0.1052], + [-0.0889, 0.1010], + [-0.0715, 0.1177], + [-0.0746, 0.1307], + [-0.1210, 0.1461], + [-0.0794, 0.1267], + [-0.1022, 0.0869], + [-0.0949, 0.1081], + [-0.0997, 0.1455], + [-0.0946, 0.0985], + [-0.0824, 0.1333], + [-0.0814, 0.1413], + [-0.1438, 0.0391], + [-0.0600, 0.1501], + [-0.0739, 0.1264], + [-0.0998, 0.1194], + [-0.0893, 0.0997], + [-0.0494, 0.1133], + [-0.1249, 0.1076], + [-0.0890, 0.1094], + [-0.0776, 0.1311], + [-0.0973, 0.1325], + [-0.0915, 0.1270], + [-0.0501, 0.1306], + [-0.0491, 0.0533], + [-0.0589, 0.1177], + [-0.0862, 0.1410], + [-0.1175, 0.1308], + [-0.0792, 0.0955], + [-0.0830, 0.1371], + [-0.0889, 0.1200], + [-0.0605, 0.0419], + [-0.0792, 0.1387], + [-0.0713, 0.1122], + [-0.0902, 0.1332], + [-0.0559, 0.0013], + [-0.0901, 0.1183], + [-0.0902, 0.1101], + [-0.0730, 0.1316], + [-0.0813, 0.1124], + [-0.1242, 0.0983], + [-0.0836, 0.1424], + [-0.0871, 0.1334], + [-0.0545, 0.0978], + [-0.0940, 0.1166], + [-0.0867, 0.1405], + [-0.0806, 0.1401], + [-0.0890, 0.1459], + [-0.0596, 0.1143], + [-0.1235, 0.1190], + [-0.0602, 0.1454], + [-0.0897, 0.1202], + [-0.1128, 0.1021], + [-0.0976, 0.0919], + [-0.0843, 0.1379], + [-0.0777, 0.1242], + [-0.0731, 0.1152], + [-0.0838, 0.1009], + [-0.0847, 0.1423], + [-0.1012, 0.1294], + [-0.0860, 0.1035], + [-0.0916, 0.1158], + [-0.0929, 0.1363], + [-0.0812, 0.1313], + [-0.0864, 0.1345], + [-0.0988, 0.1152], + [-0.0935, 0.1625], + [-0.0999, 0.1012], + [-0.0586, 0.1453], + [-0.0696, 0.1469], + [-0.0862, 0.1217], + [-0.0629, 0.1367], + [-0.1468, 0.1323], + [-0.0924, 0.0871], + [-0.1340, 0.0826], + [-0.1174, 0.1133], + [-0.0792, 0.1307], + [-0.0511, 0.1064], + [-0.1009, 0.1165], + [-0.0954, 0.1229], + [-0.1003, 0.1226], + [-0.2311, -0.0024], + [-0.0805, 0.0975], + [-0.0811, 0.1035], + [-0.0928, 0.1303], + [-0.0854, 0.1085], + [-0.0670, 0.1154], + [-0.0281, 0.1120], + [-0.0936, 0.1038], + [-0.1094, 0.1035], + [-0.0795, 0.1309], + [-0.0824, 0.1302], + [-0.0847, 0.0875], + [-0.0866, 0.1320], + [-0.0926, 0.1657], + [-0.0714, 0.1303], + [-0.0806, 0.1178], + [-0.0754, 0.1394], + [-0.0905, 0.1311], + [-0.0799, 0.1016], + [-0.0833, 0.1144], + [-0.0940, 0.1116], + [-0.0963, 0.0942], + [-0.1018, 0.1211], + [-0.0915, 0.1114], + [-0.1356, 0.0406], + [-0.1337, 0.1231], + [-0.0889, 0.1478], + [-0.1202, 0.0737], + [-0.0923, 0.1027], + [-0.0626, 0.1196], + [-0.0863, -0.0065], + [-0.0938, 0.1320], + [-0.0718, 0.1100], + [-0.0953, 0.1308], + [-0.0785, 0.1389], + [-0.0761, 0.1197], + [-0.0916, 0.1479], + [-0.0887, 0.1597], + [-0.0761, 0.1325], + [-0.0956, 0.0501], + [-0.0727, 0.1248], + [-0.0826, 0.1057], + [-0.0609, 0.1170], + [-0.0997, 0.1012], + [-0.0926, 0.0901], + [-0.0814, 0.1331], + [-0.0937, 0.1241], + [-0.0486, 0.1334], + [-0.0803, 0.1502], + [-0.1055, 0.1107], + [-0.0961, 0.1147], + [-0.0888, 0.1121], + [-0.0734, 0.1090], + [-0.0512, 0.0588], + [-0.0929, 0.1091], + [-0.0804, 0.0716], + [-0.0774, 0.1381], + [-0.1258, 0.0838], + [-0.0694, 0.1360], + [-0.1154, 0.1155], + [-0.0816, 0.1317], + [-0.0768, 0.1397], + [-0.0820, 0.1176], + [-0.0868, 0.1127], + [-0.0899, 0.1064], + [-0.1113, 0.0462], + [-0.1045, 0.1309], + [-0.1279, 0.1316], + [-0.0754, 0.1505], + [-0.0679, 0.1253], + [-0.0674, 0.1424], + [-0.1601, 0.0483], + [-0.0865, 0.1281], + [-0.0969, 0.1432], + [-0.1021, 0.1254], + [-0.0728, 0.1379], + [-0.0895, 0.1319], + [-0.1225, 0.0236], + [-0.1041, 0.1159], + [-0.1310, 0.0188], + [-0.0964, 0.1231], + [-0.0649, 0.0954], + [-0.1203, 0.0455], + [-0.0669, 0.1143], + [-0.0799, 0.1312], + [-0.0760, 0.1241], + [-0.0702, 0.0875], + [-0.0897, 0.1009], + [-0.0745, 0.1079], + [-0.0649, 0.1077], + [-0.1501, -0.0009], + [-0.0846, 0.1041], + [-0.1380, 0.0356], + [-0.0931, 0.1221], + [-0.1174, 0.0994], + [-0.0682, 0.1270], + [-0.0602, 0.1290], + [-0.0724, 0.1416], + [-0.0765, 0.1168], + [-0.0873, 0.1016], + [-0.1665, 0.0154], + [-0.0810, 0.1430], + [-0.0775, 0.1067], + [-0.0670, 0.1295], + [-0.0926, 0.1209], + [-0.1039, 0.1162], + [-0.0776, 0.0950], + [-0.1473, 0.0620], + [-0.0894, 0.1071], + [-0.0976, 0.1435], + [-0.0702, 0.0361], + [-0.2728, 0.0473], + [-0.0866, 0.1391], + [-0.0819, 0.1053], + [-0.0762, 0.1419], + [-0.1037, 0.0960], + [-0.0664, 0.1026], + [-0.1152, 0.0970], + [-0.1215, 0.0983], + [-0.0864, 0.1396], + [-0.0657, 0.1205], + [-0.0621, 0.1359], + [-0.1195, -0.0169], + [-0.0934, 0.1180], + [-0.0764, 0.1364], + [-0.0873, 0.1137], + [-0.0725, 0.1070], + [-0.0847, 0.1308], + [-0.0876, 0.1093], + [-0.0669, 0.1087], + [-0.0958, 0.1180], + [-0.0373, 0.0624], + [-0.0795, 0.1098], + [-0.1481, 0.0571], + [-0.0875, 0.1376], + [-0.1180, 0.1118], + [-0.0840, 0.1226], + [-0.0473, 0.1222], + [-0.0867, 0.0905]], device='cuda:0') +06/01/2024 12:52:08 - INFO - __main__ - tensor([[[10.4905, 10.3664], + [10.3664, 10.4802]], + + [[11.2695, 11.1312], + [11.1312, 11.2554]], + + [[10.5914, 10.5108], + [10.5108, 10.5896]], + + ..., + + [[11.3954, 11.2250], + [11.2250, 11.3988]], + + [[12.1330, 11.8623], + [11.8624, 12.1559]], + + [[11.0927, 10.9413], + [10.9413, 11.0836]]], device='cuda:0') +06/01/2024 12:52:08 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:52:13 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:52:13 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:52:15 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:52:15 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/adapter_config.json +06/01/2024 12:52:15 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 12:52:16 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_adapter.bin +06/01/2024 12:52:16 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/head_config.json +06/01/2024 12:52:16 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:52:16 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_model_head.bin +06/01/2024 12:52:16 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:52:16 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:52:16 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 12:52:16 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 12:52:16 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 12:52:16 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 12:52:22 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [0, 113, 7908, 2156, 1603, 197, 28, 342, 15, 3120, 14, 1170, 7, 860, 7, 21280, 3345, 11, 49, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2, 2, 113, 1603, 197, 28, 15, 3120, 14, 3881, 7, 21280, 3345, 11, 1603, 128, 29, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:52:22 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [0, 32110, 1637, 21, 5304, 23, 68, 41682, 4, 3248, 1589, 42100, 4, 2466, 41, 10671, 23, 10775, 5050, 2156, 519, 15199, 62, 7, 68, 41821, 4, 1096, 480, 10, 672, 45, 450, 187, 902, 158, 479, 2, 2, 32110, 1637, 21, 5304, 23, 68, 40156, 4, 3506, 1589, 40598, 4, 996, 41, 10671, 23, 321, 1497, 5050, 2156, 519, 385, 26587, 25, 239, 25, 68, 40598, 4, 1244, 480, 10, 672, 45, 450, 187, 902, 564, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:52:22 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [0, 104, 1290, 4, 610, 9153, 8, 3045, 4572, 1981, 30790, 7, 1994, 479, 2, 2, 133, 117, 12, 35624, 58, 12274, 4, 610, 9153, 9, 5517, 8, 3045, 4572, 9, 1261, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:54:44 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 12:54:44 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 12:54:44 - INFO - __main__ - tensor([[-4.0405e+00, 3.9696e+00], + [ 1.2069e+00, -1.5950e+00], + [ 3.2933e-01, -2.9285e-01], + [-3.2106e+00, 3.0938e+00], + [ 9.9778e-01, -1.1728e+00], + [-3.8921e+00, 3.7900e+00], + [-1.6699e+00, 1.5486e+00], + [-2.0778e+00, 2.0686e+00], + [-2.6863e+00, 2.5798e+00], + [-1.3748e+00, 1.1471e+00], + [-2.9963e+00, 2.9205e+00], + [ 1.0662e+00, -1.1580e+00], + [ 1.2907e+00, -1.2880e+00], + [-1.8749e+00, 1.7263e+00], + [-3.8119e+00, 3.7902e+00], + [-1.0583e+00, 9.5314e-01], + [-3.9065e+00, 3.7606e+00], + [ 1.2708e+00, -1.2688e+00], + [-2.9364e+00, 2.8524e+00], + [-3.4222e-01, 1.8742e-01], + [ 1.0290e+00, -1.2041e+00], + [-1.6447e+00, 1.4323e+00], + [ 2.3838e-01, -3.8824e-01], + [-3.1108e+00, 3.0261e+00], + [-2.4252e+00, 2.3234e+00], + [-8.0644e-01, 5.5071e-01], + [ 5.5615e-01, -8.3122e-01], + [-4.0571e+00, 4.0040e+00], + [-1.7587e+00, 1.5730e+00], + [-2.7826e+00, 2.6544e+00], + [ 4.8901e-02, -1.1667e-01], + [-3.2086e+00, 3.1303e+00], + [-1.1040e+00, 9.1223e-01], + [-1.7935e+00, 1.6669e+00], + [-3.9729e+00, 3.8718e+00], + [-1.2131e+00, 1.0035e+00], + [ 8.0130e-01, -9.4194e-01], + [ 1.0382e+00, -9.2734e-01], + [-2.0212e+00, 1.9494e+00], + [-4.0608e+00, 4.0145e+00], + [ 6.4867e-01, -7.5836e-01], + [-4.0048e+00, 3.9407e+00], + [ 7.5725e-01, -9.5281e-01], + [ 9.5202e-01, -1.2099e+00], + [ 5.8883e-01, -9.0077e-01], + [-3.8781e+00, 3.7861e+00], + [-3.0983e+00, 2.9697e+00], + [ 1.4667e+00, -1.7044e+00], + [-3.9611e+00, 3.7485e+00], + [-3.4836e+00, 3.2627e+00], + [-3.2653e+00, 2.9908e+00], + [-3.2750e+00, 3.0865e+00], + [-3.7872e+00, 3.6291e+00], + [-4.3452e+00, 4.1981e+00], + [-1.6186e+00, 1.5573e+00], + [-3.4782e+00, 3.4019e+00], + [-9.4068e-01, 7.7119e-01], + [-4.0288e+00, 3.9590e+00], + [-4.1484e+00, 4.0088e+00], + [-2.7115e+00, 2.5121e+00], + [ 7.4038e-02, -3.9790e-01], + [ 1.1511e-01, -5.6500e-02], + [-3.1737e+00, 3.1151e+00], + [-3.6940e+00, 3.5743e+00], + [-2.1192e+00, 1.8872e+00], + [ 6.9836e-01, -7.2115e-01], + [-4.2407e+00, 4.0492e+00], + [-3.5516e+00, 3.4436e+00], + [ 2.3271e-02, 3.1135e-02], + [-4.1127e+00, 3.9091e+00], + [-3.5536e+00, 3.5022e+00], + [-1.7685e+00, 1.6473e+00], + [-2.8194e+00, 2.6359e+00], + [-2.4471e+00, 2.1680e+00], + [-2.4822e+00, 2.3628e+00], + [-3.3849e+00, 3.2742e+00], + [-3.3464e+00, 3.2106e+00], + [-3.4484e+00, 3.2958e+00], + [-3.9828e+00, 3.9123e+00], + [-3.9542e+00, 3.8342e+00], + [-3.2421e+00, 3.0690e+00], + [-2.2404e+00, 2.2461e+00], + [-2.8660e+00, 2.5814e+00], + [ 7.0336e-01, -9.6638e-01], + [-3.1439e+00, 3.0180e+00], + [-7.9976e-01, 5.0303e-01], + [-2.9139e+00, 2.6160e+00], + [ 1.8167e-01, -3.8224e-01], + [-4.0821e+00, 3.8965e+00], + [-4.1230e+00, 3.9889e+00], + [-2.8263e-01, -1.0464e-02], + [-3.7192e+00, 3.6904e+00], + [-3.5073e+00, 3.3821e+00], + [ 5.7906e-01, -5.0972e-01], + [-3.5307e+00, 3.3804e+00], + [-4.1378e+00, 4.0485e+00], + [ 1.2675e+00, -1.6085e+00], + [-3.4701e+00, 3.4775e+00], + [-1.5744e+00, 1.3608e+00], + [-2.5368e+00, 2.2534e+00], + [-3.0280e+00, 2.8010e+00], + [ 1.0757e+00, -1.0063e+00], + [-3.5514e+00, 3.4668e+00], + [-4.1085e+00, 3.9991e+00], + [ 1.1906e+00, -1.4253e+00], + [-3.9592e+00, 3.8774e+00], + [-6.2062e-02, -9.9345e-02], + [ 8.3771e-01, -1.0619e+00], + [ 7.7887e-01, -8.4559e-01], + [ 1.0267e-01, -3.2289e-01], + [-1.2048e+00, 1.1835e+00], + [-1.1669e+00, 1.1495e+00], + [-2.0952e+00, 1.9592e+00], + [-4.1290e+00, 4.0040e+00], + [-2.8087e-01, 3.0770e-02], + [-6.6047e-01, 5.2140e-01], + [-2.8700e+00, 2.6905e+00], + [-2.9930e+00, 2.9243e+00], + [-3.2468e+00, 3.2267e+00], + [-4.0420e+00, 3.9230e+00], + [-2.3117e+00, 2.2515e+00], + [-6.2577e-01, 4.3556e-01], + [ 1.0710e+00, -1.2805e+00], + [-2.4278e+00, 2.2646e+00], + [-4.1759e+00, 4.0351e+00], + [-3.7696e+00, 3.6643e+00], + [-3.8092e+00, 3.6444e+00], + [ 6.9585e-01, -9.5232e-01], + [-3.7930e+00, 3.6849e+00], + [-4.0589e+00, 3.9807e+00], + [-2.3089e+00, 2.2185e+00], + [-8.0711e-01, 5.4555e-01], + [-3.5078e+00, 3.3388e+00], + [-6.3438e-01, 3.9604e-01], + [-1.4552e+00, 1.3765e+00], + [-3.2507e+00, 3.0605e+00], + [ 6.3784e-01, -8.1849e-01], + [ 3.9325e-01, -4.7864e-01], + [-4.1512e+00, 4.1032e+00], + [-2.4885e+00, 2.3018e+00], + [-3.1583e+00, 3.0528e+00], + [-1.1198e-01, -1.8311e-03], + [ 7.2026e-01, -9.5533e-01], + [-3.6080e+00, 3.4502e+00], + [ 1.4203e+00, -1.5468e+00], + [-2.5731e+00, 2.3865e+00], + [-3.5289e+00, 3.3538e+00], + [-1.6134e+00, 1.3845e+00], + [-1.0687e+00, 8.3473e-01], + [-2.8682e+00, 2.8481e+00], + [ 1.0386e+00, -9.3492e-01], + [-1.8534e-01, -7.9294e-03], + [-4.0178e+00, 3.8939e+00], + [-2.7705e+00, 2.6993e+00], + [-2.2511e+00, 2.0889e+00], + [-3.6556e+00, 3.4864e+00], + [-4.0110e+00, 3.7547e+00], + [-8.1271e-01, 6.8273e-01], + [-1.2610e+00, 1.0390e+00], + [-3.5579e+00, 3.4789e+00], + [-2.4350e+00, 2.1692e+00], + [-3.2200e+00, 3.0358e+00], + [-3.3950e+00, 3.3160e+00], + [-4.1205e+00, 3.9600e+00], + [-1.9385e+00, 1.7137e+00], + [-3.8529e+00, 3.7288e+00], + [-3.4272e+00, 3.2248e+00], + [ 7.6049e-01, -1.1273e+00], + [-2.4185e+00, 2.3973e+00], + [ 1.5906e+00, -1.8652e+00], + [-5.3129e-01, 3.9768e-01], + [-6.0673e-01, 3.9859e-01], + [-9.5380e-01, 8.8880e-01], + [-3.3201e+00, 3.0391e+00], + [-8.8861e-02, -1.9162e-01], + [-3.4579e+00, 3.2215e+00], + [-3.8294e+00, 3.6826e+00], + [ 4.7940e-02, -2.1811e-01], + [-2.6853e+00, 2.5309e+00], + [-3.9317e+00, 3.6974e+00], + [ 2.1880e-01, -4.2160e-01], + [-2.4699e+00, 2.2523e+00], + [-1.8520e+00, 1.7710e+00], + [-3.4851e+00, 3.3408e+00], + [-7.7930e-01, 7.8909e-01], + [-8.7925e-01, 9.0835e-01], + [ 8.3117e-01, -1.0209e+00], + [-2.6482e+00, 2.4823e+00], + [ 1.6920e+00, -1.9472e+00], + [-1.6647e+00, 1.3779e+00], + [-3.9736e+00, 3.9402e+00], + [ 1.3024e+00, -1.5368e+00], + [-2.0073e+00, 1.7641e+00], + [-3.1823e+00, 3.0285e+00], + [ 3.0868e-01, -4.2383e-01], + [-2.7572e+00, 2.6573e+00], + [-3.9819e+00, 3.8522e+00], + [ 2.4347e-01, -5.0327e-01], + [-1.5395e+00, 1.2870e+00], + [-2.6130e+00, 2.3995e+00], + [-3.8732e+00, 3.7581e+00], + [-1.0221e+00, 1.0288e+00], + [ 1.6256e-01, -9.0097e-02], + [-2.1464e+00, 2.0593e+00], + [-3.4337e+00, 3.3449e+00], + [ 1.7365e+00, -1.7903e+00], + [-1.4440e+00, 1.2931e+00], + [-3.7553e+00, 3.6300e+00], + [ 6.5502e-02, -5.6605e-02], + [-3.4797e-01, 6.2915e-02], + [ 5.2548e-01, -5.6250e-01], + [-3.5490e+00, 3.4258e+00], + [-2.0991e+00, 1.9292e+00], + [-1.4160e+00, 1.3297e+00], + [-3.3292e+00, 3.2306e+00], + [-3.8959e+00, 3.7533e+00], + [-3.1881e+00, 3.0378e+00], + [-2.9957e+00, 2.8487e+00], + [ 9.6201e-01, -1.2345e+00], + [-2.8679e+00, 2.7748e+00], + [-3.0292e+00, 2.8109e+00], + [-2.8359e+00, 2.7646e+00], + [-3.8618e+00, 3.6205e+00], + [ 1.0225e+00, -1.1376e+00], + [-3.8145e+00, 3.7468e+00], + [-3.6756e+00, 3.5652e+00], + [-4.2120e+00, 4.0512e+00], + [-3.4755e+00, 3.2893e+00], + [-3.3304e+00, 3.1055e+00], + [-2.5962e+00, 2.4720e+00], + [-3.0754e+00, 2.9707e+00], + [-1.1332e+00, 9.7717e-01], + [-1.9533e+00, 1.7172e+00], + [ 6.0620e-02, -4.7525e-02], + [ 3.2005e-02, -4.0473e-02], + [-5.5107e-01, 5.1599e-01], + [-2.3380e+00, 2.2337e+00], + [-1.0873e-01, -9.6624e-02], + [ 9.8432e-01, -1.1091e+00], + [-1.4201e+00, 1.3091e+00], + [-2.2724e+00, 1.9403e+00], + [ 1.1048e+00, -1.4524e+00], + [-3.0144e+00, 2.9174e+00], + [-2.6660e+00, 2.5777e+00], + [-2.6290e+00, 2.5226e+00], + [-3.7638e+00, 3.5986e+00], + [-3.0869e+00, 3.0135e+00], + [-2.4232e+00, 2.3226e+00], + [-2.9366e+00, 2.7581e+00], + [-3.8403e+00, 3.7177e+00], + [-3.3942e+00, 3.1876e+00], + [ 1.5369e-01, -3.1495e-01], + [ 2.0256e-01, -2.3239e-01], + [-5.4051e-01, 2.7810e-01], + [-5.6944e-02, -8.8551e-02], + [ 1.1115e+00, -1.5334e+00], + [-4.2156e+00, 4.1458e+00], + [-2.7892e+00, 2.7236e+00], + [-1.4672e+00, 1.3221e+00], + [ 9.4561e-01, -9.7477e-01], + [-3.5275e+00, 3.2769e+00], + [-1.0237e+00, 7.7703e-01], + [-3.4628e+00, 3.2377e+00], + [-2.1660e+00, 1.9436e+00], + [ 3.5809e-01, -2.9870e-01], + [-1.9874e+00, 1.8754e+00], + [-2.2554e+00, 1.9687e+00], + [ 1.0599e+00, -1.3770e+00], + [ 6.3474e-01, -9.8507e-01], + [-3.5356e+00, 3.3793e+00], + [ 1.0083e+00, -1.2007e+00], + [-3.5972e+00, 3.4849e+00], + [-3.8987e+00, 3.8193e+00], + [-3.9553e+00, 3.8329e+00], + [-4.0172e+00, 3.9637e+00], + [-3.7685e+00, 3.7495e+00], + [-4.0129e+00, 3.9519e+00], + [-8.7708e-02, -8.6437e-02], + [-2.3099e+00, 2.2574e+00], + [ 1.2686e+00, -1.4877e+00], + [-3.5023e+00, 3.2941e+00], + [-1.7642e+00, 1.6789e+00], + [-3.2458e-01, 2.0938e-02], + [-6.0590e-01, 4.8391e-01], + [-1.1018e+00, 8.4065e-01], + [-3.2594e+00, 3.2459e+00], + [-3.4217e+00, 3.2272e+00], + [-1.5661e-01, 2.2487e-01], + [-3.7656e+00, 3.5994e+00], + [ 9.5303e-01, -1.3184e+00], + [-1.2543e-01, 6.5026e-02], + [ 9.8721e-01, -1.1735e+00], + [-4.0555e+00, 3.9056e+00], + [-1.9692e+00, 1.8899e+00], + [-2.6862e+00, 2.5646e+00], + [ 5.5243e-01, -4.7917e-01], + [ 8.0536e-01, -9.8671e-01], + [-2.9995e+00, 2.8010e+00], + [-3.2016e+00, 3.0513e+00], + [-2.3903e+00, 2.2648e+00], + [-4.0723e+00, 3.9267e+00], + [-3.9343e+00, 3.8105e+00], + [-3.2129e+00, 3.0234e+00], + [ 4.5618e-01, -4.3991e-01], + [-1.6907e+00, 1.5473e+00], + [-3.3566e+00, 3.2387e+00], + [ 9.9254e-01, -1.1633e+00], + [-4.0297e+00, 3.8663e+00], + [ 9.8470e-01, -9.9853e-01], + [-7.9857e-01, 6.1403e-01], + [-3.3100e+00, 3.0769e+00], + [-4.0117e+00, 3.9449e+00], + [-6.4701e-02, -2.0454e-01], + [ 5.4017e-01, -8.0635e-01], + [-3.9961e+00, 3.9616e+00], + [ 4.7159e-01, -7.1592e-01], + [-1.0365e+00, 8.5390e-01], + [-2.6376e+00, 2.6224e+00], + [ 5.0885e-01, -6.0054e-01], + [-6.9661e-01, 3.6839e-01], + [ 7.2284e-01, -9.4545e-01], + [ 1.6143e+00, -1.9611e+00], + [ 1.8954e+00, -1.9980e+00], + [-2.3891e+00, 2.1879e+00], + [ 1.5983e+00, -1.8320e+00], + [-4.0767e+00, 4.0257e+00], + [-2.2134e+00, 2.0548e+00], + [-3.4196e+00, 3.3249e+00], + [-3.3706e+00, 3.2165e+00], + [-7.2324e-01, 5.3137e-01], + [-3.1239e+00, 3.0487e+00], + [-3.8725e+00, 3.7811e+00], + [-8.4527e-02, -1.5254e-01], + [-8.6042e-01, 6.6018e-01], + [-3.8780e+00, 3.7285e+00], + [-2.0091e+00, 1.9042e+00], + [-3.8768e+00, 3.7745e+00], + [-2.3514e+00, 2.2935e+00], + [-5.3248e-01, 4.1851e-01], + [-2.4760e+00, 2.3895e+00], + [-3.3932e+00, 3.2330e+00], + [-3.1790e+00, 3.1051e+00], + [ 1.5831e+00, -1.7521e+00], + [-1.2612e+00, 1.2058e+00], + [-3.3421e+00, 3.2694e+00], + [-3.8899e+00, 3.7391e+00], + [-2.3868e+00, 2.2096e+00], + [-4.1486e+00, 4.0194e+00], + [-1.9248e+00, 1.7171e+00], + [-3.1648e+00, 2.9844e+00], + [ 7.4779e-01, -8.6561e-01], + [-3.3155e+00, 3.0295e+00], + [-1.6917e+00, 1.5792e+00], + [-2.8127e+00, 2.7328e+00], + [ 1.4952e+00, -1.6609e+00], + [-3.6585e-01, 2.8790e-01], + [-3.4291e+00, 3.3053e+00], + [-1.4287e+00, 1.3111e+00], + [-1.2208e+00, 1.0665e+00], + [-4.0275e+00, 3.9030e+00], + [ 7.4549e-01, -9.9983e-01], + [-2.8342e+00, 2.5836e+00], + [-4.2766e+00, 4.1407e+00], + [ 7.1180e-01, -9.3664e-01], + [-3.1609e+00, 2.8682e+00], + [-3.2141e+00, 3.1217e+00], + [-2.9947e+00, 2.7765e+00], + [-2.4011e+00, 2.2994e+00], + [ 2.3772e-01, -3.9398e-01], + [ 1.1563e+00, -1.4251e+00], + [-2.3859e+00, 2.2818e+00], + [-4.1857e+00, 4.1085e+00], + [ 4.4482e-01, -7.0528e-01], + [-6.4559e-01, 4.1624e-01], + [-9.1902e-02, -1.0583e-01], + [ 3.3179e-01, -5.1156e-01], + [-1.6941e+00, 1.5063e+00], + [ 8.5735e-01, -1.1210e+00], + [-8.3207e-01, 6.5489e-01], + [-4.0485e+00, 3.8666e+00], + [-2.6697e+00, 2.5023e+00], + [-3.9857e+00, 3.8676e+00], + [-3.4312e-01, 1.9057e-01], + [-2.9514e+00, 2.8785e+00], + [-2.1955e+00, 1.9552e+00], + [ 1.2296e+00, -1.3023e+00], + [-3.6418e+00, 3.5154e+00], + [ 7.7138e-01, -8.8921e-01], + [-2.6554e+00, 2.5201e+00], + [ 3.3418e-01, -4.4276e-01], + [-3.9592e+00, 3.8975e+00], + [-1.7124e+00, 1.5487e+00], + [-3.0487e+00, 2.8793e+00], + [-3.2195e+00, 3.1194e+00], + [-3.4793e+00, 3.4144e+00], + [-1.7957e+00, 1.6519e+00], + [-4.2500e+00, 4.1499e+00], + [-1.9273e+00, 1.8729e+00], + [-1.2646e+00, 1.0958e+00], + [-2.8058e+00, 2.5467e+00], + [ 1.2609e+00, -1.2041e+00], + [-4.0038e+00, 3.7990e+00], + [-6.3741e-01, 3.0747e-01], + [ 1.1458e+00, -1.0680e+00], + [-2.0963e+00, 1.6576e+00], + [-4.0101e+00, 3.8792e+00], + [ 2.1436e-01, -2.0278e-01], + [-5.0683e-01, 3.2650e-01]], device='cuda:0') +06/01/2024 12:54:45 - INFO - __main__ - tensor([[[ 5.1989, 3.9739], + [ 3.9739, 5.1945]], + + [[ 2.1275, 1.0994], + [ 1.0994, 2.1435]], + + [[ 2.9471, 0.1891], + [ 0.1891, 2.8743]], + + ..., + + [[ 5.2022, 4.1160], + [ 4.1160, 5.2208]], + + [[ 2.9550, 0.4178], + [ 0.4178, 2.9668]], + + [[11.0361, -8.0194], + [-8.0194, 12.0281]]], device='cuda:0') +06/01/2024 12:54:45 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:54:49 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:54:49 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:54:50 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:54:51 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/adapter_config.json +06/01/2024 12:54:51 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 12:54:51 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_adapter.bin +06/01/2024 12:54:51 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/head_config.json +06/01/2024 12:54:51 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:54:51 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_model_head.bin +06/01/2024 12:54:51 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:54:51 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:54:51 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 12:54:51 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 12:54:51 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 12:54:51 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 12:54:59 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [0, 113, 7908, 2156, 1603, 197, 28, 342, 15, 3120, 14, 1170, 7, 860, 7, 21280, 3345, 11, 49, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2, 2, 113, 1603, 197, 28, 15, 3120, 14, 3881, 7, 21280, 3345, 11, 1603, 128, 29, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:54:59 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [0, 32110, 1637, 21, 5304, 23, 68, 41682, 4, 3248, 1589, 42100, 4, 2466, 41, 10671, 23, 10775, 5050, 2156, 519, 15199, 62, 7, 68, 41821, 4, 1096, 480, 10, 672, 45, 450, 187, 902, 158, 479, 2, 2, 32110, 1637, 21, 5304, 23, 68, 40156, 4, 3506, 1589, 40598, 4, 996, 41, 10671, 23, 321, 1497, 5050, 2156, 519, 385, 26587, 25, 239, 25, 68, 40598, 4, 1244, 480, 10, 672, 45, 450, 187, 902, 564, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:54:59 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [0, 104, 1290, 4, 610, 9153, 8, 3045, 4572, 1981, 30790, 7, 1994, 479, 2, 2, 133, 117, 12, 35624, 58, 12274, 4, 610, 9153, 9, 5517, 8, 3045, 4572, 9, 1261, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:57:20 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 12:57:20 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 12:57:20 - INFO - __main__ - tensor([[-5.3736, 5.3006], + [ 2.2872, -2.7834], + [-0.6584, 0.6253], + [-0.6374, 0.4587], + [ 2.7166, -3.1182], + [-5.0236, 4.8861], + [-3.2228, 3.0483], + [-2.1785, 2.1034], + [-4.8119, 4.7141], + [-4.9463, 4.7759], + [-4.7171, 4.6535], + [ 2.3597, -2.4690], + [ 0.5142, -0.6503], + [-3.2537, 3.0616], + [-5.3618, 5.3512], + [ 0.1014, -0.4018], + [-5.2692, 5.1701], + [ 1.3555, -1.4856], + [-4.4075, 4.3462], + [-0.0646, -0.1540], + [ 1.7768, -2.0618], + [-1.4305, 1.0668], + [ 1.2959, -1.5106], + [-5.4215, 5.3911], + [-4.3594, 4.3252], + [-1.7461, 1.3538], + [ 0.9491, -1.3259], + [-5.3890, 5.3814], + [-3.1926, 2.9408], + [-4.3661, 4.2044], + [ 0.0068, -0.1263], + [-4.3821, 4.3091], + [-2.8416, 2.5867], + [-3.5153, 3.3304], + [-5.4966, 5.3760], + [-1.5279, 1.2343], + [ 2.3113, -2.5551], + [ 2.0101, -1.9096], + [-3.6939, 3.5063], + [-5.3971, 5.3891], + [ 1.1695, -1.3517], + [-5.2282, 5.1878], + [ 1.9945, -2.2894], + [ 2.1959, -2.5837], + [ 0.1097, -0.4731], + [-5.2057, 5.0943], + [-4.2104, 4.0708], + [ 2.7618, -3.0878], + [-5.3800, 5.2308], + [-5.1665, 5.0184], + [-2.8774, 2.5387], + [-4.9566, 4.7509], + [-5.0906, 4.8876], + [-5.4724, 5.3591], + [-1.1181, 1.1090], + [-4.7537, 4.6717], + [-2.5273, 2.2793], + [-5.3232, 5.2350], + [-5.3411, 5.2195], + [-4.7528, 4.6223], + [ 1.7827, -2.2593], + [ 0.7463, -0.7383], + [-4.5633, 4.4703], + [-5.1799, 5.0626], + [-0.9608, 0.6094], + [ 1.0572, -1.1490], + [-5.6218, 5.5216], + [-5.1774, 5.0863], + [-0.1825, 0.1610], + [-5.4332, 5.2667], + [-4.9733, 4.9127], + [-3.4882, 3.3374], + [-4.5032, 4.3214], + [-4.4881, 4.2835], + [-1.9617, 1.7708], + [-4.9770, 4.8749], + [-4.6932, 4.6168], + [-4.5267, 4.4147], + [-5.4121, 5.3509], + [-5.2194, 5.0963], + [-4.1470, 3.9119], + [-3.6898, 3.6354], + [-4.8561, 4.5699], + [ 2.1600, -2.5325], + [-4.8492, 4.7816], + [-2.2829, 1.9143], + [-4.7310, 4.4983], + [ 1.4244, -1.7941], + [-5.4959, 5.3112], + [-5.2252, 5.1030], + [ 2.0956, -2.5949], + [-4.9722, 4.9865], + [-5.0410, 4.8815], + [ 2.8260, -2.8405], + [-4.8326, 4.6785], + [-5.3537, 5.2980], + [ 1.4826, -1.9502], + [-4.7536, 4.7948], + [-1.3380, 1.0651], + [-4.4968, 4.2019], + [-4.9074, 4.6970], + [ 1.6935, -1.6562], + [-5.3151, 5.2502], + [-5.2251, 5.1311], + [ 2.3718, -2.7093], + [-5.3355, 5.1984], + [ 0.2124, -0.4818], + [ 1.8977, -2.1790], + [ 2.7792, -3.0773], + [-1.5291, 1.1664], + [-2.5448, 2.5009], + [-2.5649, 2.5219], + [-4.1906, 4.0604], + [-5.2997, 5.2257], + [ 0.3829, -0.7650], + [ 0.0389, -0.3427], + [-3.7758, 3.5238], + [-4.2751, 4.1654], + [-4.8480, 4.8888], + [-5.3666, 5.2642], + [-4.2300, 4.1368], + [ 1.6581, -2.1386], + [ 1.4481, -1.7483], + [-4.7447, 4.5460], + [-5.4130, 5.3102], + [-5.4134, 5.3904], + [-5.2518, 5.1089], + [ 1.1422, -1.5575], + [-5.3163, 5.2653], + [-5.3251, 5.2721], + [-2.5262, 2.4552], + [ 0.0818, -0.4636], + [-5.1794, 5.0334], + [ 0.1167, -0.3939], + [-0.7373, 0.4313], + [-4.9334, 4.7697], + [ 1.9452, -2.2079], + [ 0.9849, -1.1734], + [-5.2309, 5.1817], + [-4.4464, 4.2512], + [-4.5016, 4.4019], + [ 1.9066, -2.1856], + [ 1.2186, -1.5020], + [-5.3307, 5.2049], + [ 2.9086, -3.1066], + [ 0.5495, -0.8320], + [-5.0943, 4.9740], + [-1.9978, 1.6029], + [-1.5371, 1.2000], + [-4.6419, 4.6111], + [ 1.7336, -1.6483], + [-0.1565, -0.0989], + [-5.2012, 5.0457], + [-4.3368, 4.2513], + [-2.3712, 2.1656], + [-5.0778, 4.9473], + [-5.5302, 5.3280], + [ 0.5319, -0.6708], + [ 0.4968, -0.8700], + [-4.9466, 4.8804], + [-2.0614, 1.8108], + [-5.0356, 4.8731], + [-4.5159, 4.4413], + [-5.5832, 5.4699], + [-3.5433, 3.2477], + [-5.4328, 5.3490], + [-3.9025, 3.6807], + [ 1.5487, -2.0221], + [-4.2941, 4.2322], + [ 3.2241, -3.5662], + [ 0.5278, -0.7932], + [-0.1187, -0.1613], + [-0.9902, 0.9291], + [-4.1201, 3.7997], + [ 1.4608, -1.7465], + [-5.0560, 4.8131], + [-5.3057, 5.2066], + [ 1.8911, -2.1035], + [-3.0180, 2.7847], + [-5.3616, 5.1882], + [-0.1083, -0.2476], + [-3.5614, 3.3308], + [-4.1346, 4.0304], + [-5.1763, 5.0403], + [-1.6879, 1.7033], + [-3.9588, 3.9596], + [ 1.0710, -1.3351], + [-4.7580, 4.6708], + [ 2.6418, -3.0318], + [-2.4831, 2.1812], + [-5.3106, 5.2868], + [ 2.7044, -3.0607], + [-2.9981, 2.6847], + [-4.4553, 4.3152], + [ 1.5794, -1.7120], + [-4.4518, 4.3450], + [-5.2975, 5.2018], + [ 1.5884, -1.9108], + [-4.1467, 3.8522], + [-2.5668, 2.2545], + [-5.3732, 5.2768], + [ 0.9417, -1.1033], + [-2.2537, 2.2060], + [-2.0587, 1.8840], + [-5.2914, 5.2601], + [ 1.7049, -1.8579], + [-0.8961, 0.7272], + [-5.0656, 4.9562], + [ 0.3881, -0.4230], + [ 0.2459, -0.6707], + [ 0.7263, -0.8717], + [-4.6653, 4.5032], + [-4.0686, 3.8106], + [-3.8002, 3.6660], + [-4.8141, 4.7286], + [-5.4286, 5.2891], + [-3.9270, 3.7896], + [-4.5757, 4.4246], + [ 1.7522, -2.0993], + [-3.9115, 3.7682], + [-1.1924, 0.9140], + [-4.7773, 4.7160], + [-5.5464, 5.3586], + [ 1.3460, -1.5134], + [-4.8600, 4.8594], + [-5.2914, 5.2414], + [-5.4366, 5.3092], + [-4.9310, 4.7161], + [-5.0788, 4.8883], + [-4.7909, 4.6053], + [-4.5526, 4.4284], + [-1.7673, 1.6069], + [-4.7637, 4.5589], + [ 1.4361, -1.4610], + [ 1.0487, -1.0951], + [-0.2894, 0.1584], + [ 0.1621, -0.3796], + [-0.6064, 0.2862], + [ 1.1852, -1.4800], + [-3.4007, 3.3194], + [-4.5465, 4.2681], + [ 1.5047, -1.9644], + [-4.5634, 4.4502], + [-3.1839, 3.0367], + [-0.9798, 0.7210], + [-4.6246, 4.4217], + [-4.8305, 4.7310], + [-1.4328, 1.3288], + [-4.6868, 4.5132], + [-5.4824, 5.3810], + [-4.3804, 4.1493], + [ 1.4734, -1.7738], + [ 1.1803, -1.2532], + [-0.7547, 0.3448], + [ 0.3523, -0.6520], + [ 2.2237, -2.7406], + [-5.4250, 5.3719], + [-4.2707, 4.1597], + [-1.1373, 0.8702], + [ 1.7237, -1.8180], + [-5.0382, 4.7936], + [ 0.9057, -1.2806], + [-4.5256, 4.2627], + [-3.1156, 2.8748], + [ 0.3326, -0.3337], + [-2.1839, 1.9786], + [-2.7222, 2.3970], + [ 1.8974, -2.3429], + [ 1.4876, -1.9727], + [-4.5146, 4.3708], + [ 2.3954, -2.6754], + [-5.0627, 4.9468], + [-5.3173, 5.2598], + [-5.2605, 5.1576], + [-5.3189, 5.2437], + [-5.2753, 5.2838], + [-5.4837, 5.4415], + [-0.6704, 0.3994], + [-2.6689, 2.6544], + [ 1.5687, -1.9660], + [-4.8687, 4.6361], + [-3.3069, 3.2056], + [ 0.6386, -1.0469], + [ 1.3677, -1.5441], + [-1.8242, 1.4283], + [-5.3283, 5.2637], + [-5.2876, 5.1577], + [-2.0543, 1.9974], + [-5.3582, 5.2259], + [ 1.7639, -2.2456], + [ 1.1576, -1.2972], + [ 2.5401, -2.7982], + [-5.3790, 5.2618], + [-2.2362, 2.0118], + [-4.3978, 4.2312], + [ 1.7982, -1.7527], + [ 2.0552, -2.3572], + [-3.7264, 3.4971], + [-5.2670, 5.1268], + [-2.0381, 1.7943], + [-5.3207, 5.1594], + [-5.2429, 5.0855], + [-4.9135, 4.7521], + [ 1.2695, -1.3175], + [-3.9585, 3.7224], + [-4.5935, 4.5054], + [ 1.9261, -2.2010], + [-5.1763, 5.0687], + [ 2.2106, -2.3111], + [-1.8640, 1.6428], + [-0.9538, 0.5247], + [-5.1238, 5.0384], + [ 0.9809, -1.3532], + [ 1.3709, -1.6537], + [-5.3530, 5.3103], + [ 0.5490, -0.8835], + [-2.2016, 1.9747], + [-4.5046, 4.4421], + [ 1.3059, -1.4631], + [-0.0821, -0.3770], + [ 2.1809, -2.5434], + [ 3.1136, -3.5766], + [ 2.3630, -2.5453], + [-2.5980, 2.2754], + [ 2.5488, -2.8594], + [-5.0172, 4.9260], + [-3.5871, 3.3013], + [-4.6551, 4.5845], + [-4.9665, 4.8336], + [-1.1406, 0.8889], + [-4.4719, 4.3872], + [-5.2866, 5.1676], + [-0.0115, -0.2978], + [-1.5307, 1.2043], + [-5.3842, 5.2548], + [-1.9286, 1.8565], + [-5.0043, 4.9054], + [-4.4263, 4.3636], + [ 1.1074, -1.2032], + [-4.6638, 4.6524], + [-5.4375, 5.2966], + [-5.0582, 4.9674], + [ 2.1016, -2.3078], + [-0.3095, 0.1408], + [-4.4804, 4.4013], + [-5.4074, 5.2930], + [-3.4072, 3.1209], + [-5.2431, 5.1426], + [-0.5216, 0.2382], + [-5.1441, 5.0145], + [ 1.9230, -2.1931], + [-4.9303, 4.6517], + [-3.1907, 3.0026], + [-4.0419, 3.9254], + [ 3.2002, -3.5698], + [-0.7968, 0.6183], + [-4.6872, 4.5422], + [-1.1506, 0.9440], + [-2.0734, 1.8774], + [-5.4020, 5.3131], + [ 1.4389, -1.8207], + [-3.3812, 3.0678], + [-5.5051, 5.4142], + [ 2.1554, -2.5117], + [-5.3774, 5.1725], + [-4.6084, 4.5284], + [-4.4824, 4.2684], + [-4.5509, 4.4918], + [ 1.3454, -1.5172], + [ 2.0316, -2.3897], + [-4.4047, 4.2934], + [-5.3762, 5.2348], + [ 1.3558, -1.7324], + [-0.2455, -0.0799], + [ 1.0329, -1.4190], + [-0.1566, -0.1615], + [-0.4373, 0.1029], + [ 2.0490, -2.4594], + [-0.2487, 0.0247], + [-5.5579, 5.4211], + [-1.6774, 1.5057], + [-5.3170, 5.2193], + [ 0.8784, -1.1394], + [-4.3055, 4.2029], + [-4.1561, 3.9167], + [ 3.0639, -3.1944], + [-5.2119, 5.1519], + [ 2.1754, -2.4204], + [-4.2882, 4.1668], + [ 1.4439, -1.7359], + [-5.1182, 5.0372], + [-3.8394, 3.6048], + [-3.7242, 3.5168], + [-4.5691, 4.4886], + [-4.9471, 4.8977], + [-2.6470, 2.4313], + [-5.3534, 5.2739], + [-3.2213, 3.1546], + [-1.5790, 1.3426], + [-4.2918, 4.0026], + [ 0.4772, -0.6111], + [-5.2998, 5.1067], + [-1.6368, 1.2534], + [ 2.5561, -2.5009], + [-1.9489, 1.4872], + [-5.2119, 5.1260], + [-0.3398, 0.2687], + [-1.8023, 1.5567]], device='cuda:0') +06/01/2024 12:57:21 - INFO - __main__ - tensor([[[ 6.3402, 5.3455], + [ 5.3455, 6.3827]], + + [[ 3.6051, 1.0077], + [ 1.0077, 3.5836]], + + [[ 10.1834, -5.7185], + [ -5.7185, 9.8780]], + + ..., + + [[ 6.2140, 5.1206], + [ 5.1206, 6.2781]], + + [[ 5.3051, -1.2486], + [ -1.2486, 5.2526]], + + [[ 43.7870, -39.7742], + [-39.7742, 46.1740]]], device='cuda:0') +06/01/2024 12:57:21 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:57:25 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:57:25 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:57:26 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:57:27 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/adapter_config.json +06/01/2024 12:57:27 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 12:57:27 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_adapter.bin +06/01/2024 12:57:27 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/head_config.json +06/01/2024 12:57:27 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:57:27 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_model_head.bin +06/01/2024 12:57:27 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:27 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:28 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:28 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:28 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:28 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 12:57:28 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 12:57:28 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 12:57:28 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 12:57:28 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 12:57:28 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 12:57:28 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 12:57:28 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 12:57:34 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [0, 113, 7908, 2156, 1603, 197, 28, 342, 15, 3120, 14, 1170, 7, 860, 7, 21280, 3345, 11, 49, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2, 2, 113, 1603, 197, 28, 15, 3120, 14, 3881, 7, 21280, 3345, 11, 1603, 128, 29, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:57:34 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [0, 32110, 1637, 21, 5304, 23, 68, 41682, 4, 3248, 1589, 42100, 4, 2466, 41, 10671, 23, 10775, 5050, 2156, 519, 15199, 62, 7, 68, 41821, 4, 1096, 480, 10, 672, 45, 450, 187, 902, 158, 479, 2, 2, 32110, 1637, 21, 5304, 23, 68, 40156, 4, 3506, 1589, 40598, 4, 996, 41, 10671, 23, 321, 1497, 5050, 2156, 519, 385, 26587, 25, 239, 25, 68, 40598, 4, 1244, 480, 10, 672, 45, 450, 187, 902, 564, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:57:34 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [0, 104, 1290, 4, 610, 9153, 8, 3045, 4572, 1981, 30790, 7, 1994, 479, 2, 2, 133, 117, 12, 35624, 58, 12274, 4, 610, 9153, 9, 5517, 8, 3045, 4572, 9, 1261, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:59:58 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 12:59:58 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 12:59:58 - INFO - __main__ - tensor([[-6.2874e+00, 6.2160e+00], + [ 4.0009e+00, -4.4764e+00], + [-8.8825e-01, 8.5685e-01], + [-2.5351e+00, 2.3969e+00], + [ 5.5271e+00, -5.8317e+00], + [-5.9584e+00, 5.8229e+00], + [-2.0346e+00, 1.8476e+00], + [-4.3077e+00, 4.2137e+00], + [-4.5609e+00, 4.4859e+00], + [-6.1156e+00, 5.9972e+00], + [-5.5346e+00, 5.4571e+00], + [ 4.8452e+00, -4.9426e+00], + [ 1.6491e+00, -1.7750e+00], + [-4.9120e+00, 4.7569e+00], + [-5.8807e+00, 5.8858e+00], + [ 7.9716e-01, -1.1251e+00], + [-6.2462e+00, 6.1501e+00], + [ 3.5914e+00, -3.7168e+00], + [-5.2796e+00, 5.1887e+00], + [ 6.6961e-01, -8.7222e-01], + [ 3.1115e+00, -3.3902e+00], + [-3.6342e+00, 3.3532e+00], + [ 2.3492e+00, -2.5861e+00], + [-6.3683e+00, 6.3395e+00], + [-4.6561e+00, 4.5626e+00], + [-2.4963e+00, 2.0580e+00], + [ 8.2024e-01, -1.2578e+00], + [-6.3453e+00, 6.3124e+00], + [-2.7426e+00, 2.4396e+00], + [-5.4177e+00, 5.2593e+00], + [ 1.0397e+00, -1.1742e+00], + [-5.0910e+00, 5.0124e+00], + [-4.1328e+00, 3.9193e+00], + [-5.1044e+00, 4.9424e+00], + [-6.4583e+00, 6.3155e+00], + [-2.5699e+00, 2.2237e+00], + [ 3.5229e+00, -3.7433e+00], + [ 2.5949e+00, -2.4584e+00], + [-5.3265e+00, 5.1675e+00], + [-6.3806e+00, 6.3387e+00], + [ 1.3143e+00, -1.4060e+00], + [-6.0910e+00, 6.0546e+00], + [ 3.6652e+00, -3.9596e+00], + [ 4.2464e+00, -4.5772e+00], + [ 6.7680e-01, -1.0301e+00], + [-5.5790e+00, 5.4230e+00], + [-5.0746e+00, 4.9251e+00], + [ 4.3792e+00, -4.5660e+00], + [-6.2818e+00, 6.1799e+00], + [-6.2463e+00, 6.0832e+00], + [-3.4224e+00, 3.1240e+00], + [-5.9154e+00, 5.7197e+00], + [-5.8897e+00, 5.6745e+00], + [-6.4430e+00, 6.3314e+00], + [-1.6483e+00, 1.6134e+00], + [-5.2216e+00, 5.1230e+00], + [-3.6453e+00, 3.4413e+00], + [-6.2066e+00, 6.1122e+00], + [-6.2545e+00, 6.1599e+00], + [-5.5247e+00, 5.4120e+00], + [ 3.0801e+00, -3.5356e+00], + [ 1.0872e+00, -1.0935e+00], + [-5.7851e+00, 5.6860e+00], + [-6.0763e+00, 5.9916e+00], + [-2.5532e+00, 2.2575e+00], + [ 3.4003e+00, -3.4384e+00], + [-6.6510e+00, 6.5784e+00], + [-6.2224e+00, 6.1302e+00], + [-3.0962e-01, 2.5376e-01], + [-6.4117e+00, 6.2407e+00], + [-6.0928e+00, 6.0800e+00], + [-4.5066e+00, 4.3938e+00], + [-5.4348e+00, 5.2549e+00], + [-4.9661e+00, 4.7714e+00], + [-4.5745e+00, 4.4937e+00], + [-6.0149e+00, 5.9056e+00], + [-5.7146e+00, 5.6955e+00], + [-5.2824e+00, 5.1802e+00], + [-6.4505e+00, 6.3944e+00], + [-6.0143e+00, 5.8929e+00], + [-4.4445e+00, 4.2209e+00], + [-4.6114e+00, 4.5207e+00], + [-6.1938e+00, 5.9531e+00], + [ 3.8542e+00, -4.2499e+00], + [-4.6118e+00, 4.4703e+00], + [-9.4626e-01, 5.3366e-01], + [-5.6809e+00, 5.4627e+00], + [ 1.8231e+00, -2.1726e+00], + [-6.5149e+00, 6.3357e+00], + [-6.0833e+00, 5.9410e+00], + [ 4.3573e+00, -4.7921e+00], + [-5.7732e+00, 5.7953e+00], + [-5.5889e+00, 5.4201e+00], + [ 5.8947e+00, -5.9908e+00], + [-4.6114e+00, 4.4582e+00], + [-6.2523e+00, 6.2044e+00], + [ 2.2723e+00, -2.6705e+00], + [-5.3214e+00, 5.3846e+00], + [ 3.6452e-01, -6.6537e-01], + [-5.7185e+00, 5.4658e+00], + [-5.7781e+00, 5.5897e+00], + [ 3.3270e+00, -3.2774e+00], + [-6.4144e+00, 6.3320e+00], + [-6.1928e+00, 6.1001e+00], + [ 5.0777e+00, -5.3300e+00], + [-6.3152e+00, 6.1899e+00], + [ 5.3478e-01, -8.1930e-01], + [ 3.6419e+00, -3.8717e+00], + [ 4.8042e+00, -5.1315e+00], + [-3.4541e+00, 3.1162e+00], + [-3.2579e+00, 3.1784e+00], + [-3.9326e+00, 3.8649e+00], + [-5.1714e+00, 5.0337e+00], + [-6.2386e+00, 6.1579e+00], + [ 2.0848e+00, -2.4975e+00], + [ 9.5129e-01, -1.2772e+00], + [-4.7000e+00, 4.4649e+00], + [-5.0940e+00, 4.9762e+00], + [-5.6226e+00, 5.6569e+00], + [-6.3184e+00, 6.2100e+00], + [-5.1079e+00, 5.0096e+00], + [ 3.1100e+00, -3.5417e+00], + [ 2.4076e+00, -2.6890e+00], + [-5.4788e+00, 5.3066e+00], + [-6.3695e+00, 6.2591e+00], + [-6.4031e+00, 6.3839e+00], + [-5.7310e+00, 5.5636e+00], + [ 2.7399e+00, -3.1544e+00], + [-6.1183e+00, 6.0682e+00], + [-6.2554e+00, 6.2201e+00], + [-3.6509e+00, 3.5502e+00], + [ 1.3743e+00, -1.7972e+00], + [-6.0336e+00, 5.8914e+00], + [ 1.5798e-01, -4.0876e-01], + [-1.4188e+00, 1.1878e+00], + [-5.7872e+00, 5.6141e+00], + [ 3.5388e+00, -3.7725e+00], + [ 3.2245e+00, -3.3786e+00], + [-6.2958e+00, 6.2343e+00], + [-5.8212e+00, 5.6759e+00], + [-5.2870e+00, 5.1804e+00], + [ 2.8426e+00, -3.1212e+00], + [ 2.4939e+00, -2.7416e+00], + [-6.3807e+00, 6.2505e+00], + [ 5.1585e+00, -5.3531e+00], + [ 4.0663e-03, -2.2627e-01], + [-5.8513e+00, 5.7154e+00], + [-2.2997e+00, 1.8508e+00], + [-1.1417e+00, 7.5503e-01], + [-5.7588e+00, 5.7136e+00], + [ 4.0605e+00, -3.9716e+00], + [-1.0508e-01, -1.3914e-01], + [-5.9999e+00, 5.8703e+00], + [-4.8999e+00, 4.8206e+00], + [-3.0096e+00, 2.8145e+00], + [-5.9026e+00, 5.7353e+00], + [-6.4851e+00, 6.2482e+00], + [ 1.3007e+00, -1.3871e+00], + [ 6.1867e-01, -9.8842e-01], + [-6.0695e+00, 6.0014e+00], + [-1.4614e+00, 1.1747e+00], + [-5.6924e+00, 5.5154e+00], + [-5.2558e+00, 5.1817e+00], + [-6.6211e+00, 6.4986e+00], + [-3.6209e+00, 3.2659e+00], + [-6.2096e+00, 6.1099e+00], + [-5.3633e+00, 5.1554e+00], + [ 3.6228e+00, -4.1739e+00], + [-5.1701e+00, 5.0872e+00], + [ 5.3668e+00, -5.6337e+00], + [ 1.5560e+00, -1.7808e+00], + [ 4.5955e-01, -6.3361e-01], + [-6.1977e-01, 6.4305e-01], + [-3.7292e+00, 3.3593e+00], + [ 9.1146e-01, -1.1372e+00], + [-5.7943e+00, 5.5833e+00], + [-6.1824e+00, 6.0714e+00], + [ 3.8409e+00, -4.1158e+00], + [-2.8293e+00, 2.5713e+00], + [-6.2820e+00, 6.1221e+00], + [ 2.1955e+00, -2.6212e+00], + [-3.2266e+00, 2.8721e+00], + [-5.0968e+00, 5.0028e+00], + [-6.0779e+00, 5.9233e+00], + [-3.8512e+00, 3.9176e+00], + [-5.0014e+00, 5.0013e+00], + [ 2.1040e+00, -2.3936e+00], + [-5.3123e+00, 5.2155e+00], + [ 4.4573e+00, -4.8410e+00], + [-3.6718e+00, 3.3971e+00], + [-6.1376e+00, 6.1240e+00], + [ 4.5942e+00, -4.8556e+00], + [-4.1438e+00, 3.8500e+00], + [-5.3491e+00, 5.1796e+00], + [ 2.1753e+00, -2.2532e+00], + [-5.0372e+00, 4.9325e+00], + [-6.2271e+00, 6.1174e+00], + [ 3.8166e+00, -4.1541e+00], + [-4.0260e+00, 3.6867e+00], + [-1.4175e+00, 1.0841e+00], + [-6.3752e+00, 6.2978e+00], + [ 4.0581e+00, -4.2570e+00], + [-4.1490e+00, 4.0136e+00], + [-3.0469e+00, 2.8654e+00], + [-6.2446e+00, 6.2375e+00], + [ 2.5450e+00, -2.6375e+00], + [-2.6554e+00, 2.4766e+00], + [-6.1111e+00, 6.0194e+00], + [ 2.6565e-01, -3.2163e-01], + [ 1.6415e+00, -2.0704e+00], + [ 6.4952e-01, -7.1964e-01], + [-5.9056e+00, 5.7600e+00], + [-5.4589e+00, 5.2386e+00], + [-4.6662e+00, 4.5159e+00], + [-4.9310e+00, 4.8591e+00], + [-6.1779e+00, 5.9564e+00], + [-4.9608e+00, 4.8448e+00], + [-5.3679e+00, 5.2034e+00], + [ 2.6785e+00, -2.9744e+00], + [-4.3698e+00, 4.2021e+00], + [-1.8390e+00, 1.5726e+00], + [-4.9429e+00, 4.8989e+00], + [-6.5854e+00, 6.4275e+00], + [ 2.1100e+00, -2.2649e+00], + [-4.4500e+00, 4.3824e+00], + [-5.8317e+00, 5.7285e+00], + [-6.4296e+00, 6.2829e+00], + [-5.5732e+00, 5.3220e+00], + [-5.9753e+00, 5.7727e+00], + [-6.0348e+00, 5.8609e+00], + [-5.3643e+00, 5.2514e+00], + [-3.3079e+00, 3.1683e+00], + [-5.2641e+00, 5.0666e+00], + [ 2.6770e+00, -2.7381e+00], + [ 1.5665e+00, -1.7122e+00], + [-6.1858e-02, -1.8116e-01], + [ 2.9100e-01, -4.7653e-01], + [ 1.7044e+00, -2.0288e+00], + [ 2.1723e+00, -2.4521e+00], + [-2.7954e+00, 2.6992e+00], + [-5.2467e+00, 4.9970e+00], + [ 2.4315e+00, -2.8794e+00], + [-5.4437e+00, 5.3158e+00], + [-4.5450e+00, 4.4083e+00], + [-3.0272e+00, 2.8441e+00], + [-5.1538e+00, 4.9619e+00], + [-6.3007e+00, 6.2097e+00], + [-2.1101e+00, 2.0584e+00], + [-5.4032e+00, 5.2325e+00], + [-6.6571e+00, 6.5451e+00], + [-5.3891e+00, 5.1794e+00], + [ 2.8210e+00, -3.0837e+00], + [ 2.3631e+00, -2.4655e+00], + [-2.1789e+00, 1.8181e+00], + [ 8.5883e-01, -1.2032e+00], + [ 4.5224e+00, -4.9698e+00], + [-6.4094e+00, 6.3458e+00], + [-5.0381e+00, 4.8988e+00], + [-1.8624e+00, 1.5935e+00], + [ 3.0141e+00, -3.0895e+00], + [-5.6942e+00, 5.4448e+00], + [ 3.0791e+00, -3.4243e+00], + [-5.6844e+00, 5.4822e+00], + [-4.6748e+00, 4.4298e+00], + [ 2.3318e+00, -2.3273e+00], + [-2.6800e+00, 2.4860e+00], + [-8.3297e-01, 4.0377e-01], + [ 3.3706e+00, -3.8146e+00], + [ 1.8032e+00, -2.2752e+00], + [-5.1010e+00, 4.9386e+00], + [ 3.8340e+00, -4.0736e+00], + [-5.8076e+00, 5.7037e+00], + [-6.0096e+00, 5.9226e+00], + [-6.1265e+00, 6.0374e+00], + [-6.2084e+00, 6.1153e+00], + [-6.2529e+00, 6.2554e+00], + [-6.4182e+00, 6.3582e+00], + [-2.6303e-02, -2.2518e-01], + [-1.0662e+00, 1.0356e+00], + [ 2.7816e+00, -3.0928e+00], + [-5.6772e+00, 5.4185e+00], + [-4.3654e+00, 4.2639e+00], + [ 5.2346e-01, -9.4416e-01], + [ 2.5848e+00, -2.7857e+00], + [-2.2078e+00, 1.7555e+00], + [-6.3804e+00, 6.2917e+00], + [-6.2900e+00, 6.1758e+00], + [-2.9573e+00, 2.9158e+00], + [-5.6866e+00, 5.5142e+00], + [ 3.3887e+00, -3.8933e+00], + [ 2.3828e+00, -2.4947e+00], + [ 4.7735e+00, -4.9336e+00], + [-6.2424e+00, 6.0720e+00], + [-4.5676e+00, 4.3198e+00], + [-4.9250e+00, 4.7828e+00], + [ 3.6975e+00, -3.6579e+00], + [ 4.1469e+00, -4.4277e+00], + [-4.7881e+00, 4.5752e+00], + [-6.0472e+00, 5.8987e+00], + [-4.0864e+00, 3.8875e+00], + [-6.2703e+00, 6.1066e+00], + [-6.0265e+00, 5.8327e+00], + [-5.3834e+00, 5.1776e+00], + [ 3.4590e+00, -3.5396e+00], + [-3.5687e+00, 3.3392e+00], + [-5.5561e+00, 5.4572e+00], + [ 3.4088e+00, -3.6858e+00], + [-5.9194e+00, 5.8026e+00], + [ 3.4596e+00, -3.5180e+00], + [-3.4839e+00, 3.2617e+00], + [-9.0101e-01, 4.4316e-01], + [-5.9909e+00, 5.8808e+00], + [ 1.8765e+00, -2.2181e+00], + [ 2.4079e+00, -2.6696e+00], + [-6.3414e+00, 6.2907e+00], + [ 1.0848e+00, -1.4158e+00], + [-4.3013e+00, 4.0937e+00], + [-5.4244e+00, 5.3528e+00], + [ 2.1004e+00, -2.2383e+00], + [-1.4952e-02, -4.6850e-01], + [ 4.0057e+00, -4.3413e+00], + [ 5.3950e+00, -5.7828e+00], + [ 4.4737e+00, -4.6341e+00], + [-4.0051e+00, 3.7037e+00], + [ 4.1661e+00, -4.4114e+00], + [-5.9082e+00, 5.7984e+00], + [-4.3804e+00, 4.0795e+00], + [-5.5294e+00, 5.4457e+00], + [-6.0223e+00, 5.8685e+00], + [-1.7209e+00, 1.4374e+00], + [-5.2210e+00, 5.1136e+00], + [-6.2371e+00, 6.1107e+00], + [ 2.7556e-01, -5.9587e-01], + [-1.5694e+00, 1.2248e+00], + [-6.2103e+00, 6.0836e+00], + [-1.6801e+00, 1.5871e+00], + [-6.0223e+00, 5.9192e+00], + [-5.4106e+00, 5.3468e+00], + [ 1.6464e+00, -1.6964e+00], + [-5.4792e+00, 5.4601e+00], + [-6.3357e+00, 6.2079e+00], + [-4.8902e+00, 4.7683e+00], + [ 3.5175e+00, -3.6915e+00], + [-4.5768e-01, 2.7240e-01], + [-5.3691e+00, 5.2736e+00], + [-6.3532e+00, 6.2424e+00], + [-3.4611e+00, 3.1264e+00], + [-6.1113e+00, 5.9910e+00], + [ 1.1682e+00, -1.4289e+00], + [-6.1898e+00, 6.0442e+00], + [ 3.3408e+00, -3.6012e+00], + [-5.5518e+00, 5.2963e+00], + [-3.0999e+00, 2.9342e+00], + [-5.0409e+00, 4.9164e+00], + [ 5.4044e+00, -5.7082e+00], + [-1.6926e+00, 1.4900e+00], + [-5.1217e+00, 4.9528e+00], + [-1.4175e-01, -9.9771e-02], + [-2.0915e+00, 1.8353e+00], + [-6.3781e+00, 6.2617e+00], + [ 2.2637e+00, -2.6279e+00], + [-3.7407e+00, 3.4564e+00], + [-6.4203e+00, 6.3104e+00], + [ 5.1145e+00, -5.4413e+00], + [-6.2750e+00, 6.0321e+00], + [-5.4953e+00, 5.3943e+00], + [-5.2325e+00, 5.0207e+00], + [-4.7319e+00, 4.6732e+00], + [ 1.3976e+00, -1.5562e+00], + [ 2.7623e+00, -3.0862e+00], + [-5.4230e+00, 5.3181e+00], + [-6.3043e+00, 6.1805e+00], + [ 3.0900e+00, -3.4433e+00], + [-1.7276e+00, 1.3865e+00], + [ 1.0523e+00, -1.4661e+00], + [ 2.6113e-01, -5.5238e-01], + [ 4.1469e-01, -7.5665e-01], + [ 3.7684e+00, -4.1623e+00], + [-4.4223e-01, 2.1983e-01], + [-6.5276e+00, 6.4035e+00], + [-2.6611e+00, 2.4681e+00], + [-6.1965e+00, 6.0986e+00], + [ 2.0654e+00, -2.2475e+00], + [-5.2254e+00, 5.1067e+00], + [-4.4821e+00, 4.2204e+00], + [ 5.0581e+00, -5.1413e+00], + [-5.4230e+00, 5.2602e+00], + [ 4.5160e+00, -4.7380e+00], + [-4.9740e+00, 4.8507e+00], + [ 2.5495e+00, -2.8225e+00], + [-6.0080e+00, 5.9453e+00], + [-5.7377e+00, 5.5855e+00], + [-5.5522e+00, 5.3600e+00], + [-5.4256e+00, 5.3197e+00], + [-5.7030e+00, 5.6236e+00], + [-3.4044e+00, 3.1590e+00], + [-6.3333e+00, 6.2543e+00], + [-4.4363e+00, 4.3641e+00], + [-2.6110e+00, 2.3662e+00], + [-5.5526e+00, 5.2987e+00], + [ 1.5681e+00, -1.7027e+00], + [-6.2631e+00, 6.0454e+00], + [-5.2819e-01, -9.7543e-03], + [ 5.2009e+00, -5.1764e+00], + [-3.5838e+00, 3.1243e+00], + [-6.1183e+00, 6.0437e+00], + [ 8.8677e-02, -1.4895e-01], + [-2.7564e+00, 2.4522e+00]], device='cuda:0') +06/01/2024 12:59:58 - INFO - __main__ - tensor([[[ 8.3474, 6.4208], + [ 6.4208, 8.4517]], + + [[ 9.3791, -1.3512], + [ -1.3512, 9.0050]], + + [[ 45.6067, -38.4991], + [-38.4991, 43.9185]], + + ..., + + [[ 8.2655, 6.1628], + [ 6.1628, 8.3855]], + + [[ 13.4330, -8.4344], + [ -8.4344, 13.2881]], + + [[ 83.7905, -79.7850], + [-79.7850, 90.2814]]], device='cuda:0') +06/01/2024 12:59:58 - INFO - __main__ - ***** Completed training ***** +06/01/2024 13:00:02 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 13:00:02 - INFO - __main__ - ***** Starting script ***** +06/01/2024 13:00:03 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 13:00:04 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/adapter_config.json +06/01/2024 13:00:04 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 13:00:04 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_adapter.bin +06/01/2024 13:00:04 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/head_config.json +06/01/2024 13:00:04 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 13:00:04 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_model_head.bin +06/01/2024 13:00:04 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:00:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:00:04 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 13:00:04 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 13:00:04 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 13:00:04 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 13:00:14 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [0, 113, 7908, 2156, 1603, 197, 28, 342, 15, 3120, 14, 1170, 7, 860, 7, 21280, 3345, 11, 49, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2, 2, 113, 1603, 197, 28, 15, 3120, 14, 3881, 7, 21280, 3345, 11, 1603, 128, 29, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 13:00:14 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [0, 32110, 1637, 21, 5304, 23, 68, 41682, 4, 3248, 1589, 42100, 4, 2466, 41, 10671, 23, 10775, 5050, 2156, 519, 15199, 62, 7, 68, 41821, 4, 1096, 480, 10, 672, 45, 450, 187, 902, 158, 479, 2, 2, 32110, 1637, 21, 5304, 23, 68, 40156, 4, 3506, 1589, 40598, 4, 996, 41, 10671, 23, 321, 1497, 5050, 2156, 519, 385, 26587, 25, 239, 25, 68, 40598, 4, 1244, 480, 10, 672, 45, 450, 187, 902, 564, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 13:00:14 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [0, 104, 1290, 4, 610, 9153, 8, 3045, 4572, 1981, 30790, 7, 1994, 479, 2, 2, 133, 117, 12, 35624, 58, 12274, 4, 610, 9153, 9, 5517, 8, 3045, 4572, 9, 1261, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 13:02:40 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 13:02:40 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 13:02:40 - INFO - __main__ - tensor([[-6.3946, 6.3121], + [ 4.1245, -4.6523], + [-0.0680, 0.0620], + [-2.4345, 2.2647], + [ 5.4923, -5.8045], + [-6.3140, 6.1743], + [-2.6869, 2.4954], + [-3.8650, 3.7547], + [-5.5031, 5.4305], + [-5.9370, 5.7893], + [-5.7297, 5.6522], + [ 5.0183, -5.1314], + [ 1.7253, -1.8370], + [-4.2818, 4.1106], + [-6.0267, 6.0136], + [ 1.0139, -1.2917], + [-6.3529, 6.2341], + [ 4.4310, -4.5544], + [-5.3255, 5.2430], + [ 0.8822, -1.0922], + [ 3.3461, -3.6517], + [-3.3938, 3.1003], + [ 1.7041, -1.9551], + [-6.4858, 6.4539], + [-4.2305, 4.0980], + [-3.3267, 2.9308], + [ 0.9264, -1.3026], + [-6.4703, 6.4379], + [-2.8747, 2.5392], + [-5.1873, 5.0164], + [ 1.8911, -2.0292], + [-5.3550, 5.2761], + [-3.8343, 3.5998], + [-4.5049, 4.3448], + [-6.5221, 6.3860], + [-1.1705, 0.8171], + [ 4.7781, -4.9913], + [ 3.6866, -3.5562], + [-5.1728, 5.0324], + [-6.5134, 6.4690], + [ 1.2801, -1.4331], + [-6.3844, 6.3218], + [ 4.3444, -4.6539], + [ 4.1176, -4.4631], + [ 1.0988, -1.4137], + [-5.6423, 5.5031], + [-5.2300, 5.0917], + [ 3.7982, -4.0848], + [-6.5279, 6.4203], + [-6.4859, 6.3226], + [-4.6216, 4.3076], + [-6.1137, 5.9112], + [-6.2230, 6.0258], + [-6.5391, 6.4272], + [-3.0724, 3.0163], + [-5.8284, 5.7275], + [-3.5115, 3.2909], + [-6.3478, 6.2373], + [-6.3043, 6.1995], + [-5.3340, 5.2038], + [ 2.7555, -3.1997], + [ 1.7109, -1.7129], + [-6.0914, 6.0036], + [-6.1479, 6.0302], + [-3.3116, 3.0477], + [ 4.1665, -4.1918], + [-6.8240, 6.7573], + [-6.3349, 6.2404], + [ 1.2222, -1.2663], + [-6.5508, 6.3907], + [-5.9169, 5.8570], + [-4.3182, 4.2132], + [-5.4570, 5.2712], + [-5.5039, 5.3137], + [-4.6409, 4.5505], + [-6.2306, 6.1130], + [-6.0998, 6.0584], + [-5.5098, 5.4106], + [-6.5417, 6.4595], + [-6.0518, 5.9268], + [-4.9939, 4.7755], + [-4.7710, 4.6932], + [-6.3458, 6.1172], + [ 4.2841, -4.6478], + [-5.2290, 5.1111], + [-1.0441, 0.6621], + [-5.8760, 5.6557], + [-0.5413, 0.2168], + [-6.6407, 6.4561], + [-6.2932, 6.1368], + [ 5.4807, -5.8484], + [-5.9619, 5.9720], + [-5.6132, 5.4163], + [ 6.5427, -6.6635], + [-5.4347, 5.2796], + [-6.4536, 6.3841], + [ 3.2447, -3.6548], + [-5.2393, 5.2918], + [-0.1097, -0.1974], + [-6.2524, 6.0381], + [-5.6967, 5.5188], + [ 4.4034, -4.3808], + [-6.5470, 6.4593], + [-6.3818, 6.2547], + [ 5.3158, -5.5567], + [-6.3961, 6.2611], + [ 1.8893, -2.1728], + [ 3.6097, -3.8691], + [ 5.4576, -5.7847], + [-3.5218, 3.2095], + [-1.9607, 1.8528], + [-3.7534, 3.6665], + [-5.3929, 5.2470], + [-6.4158, 6.3356], + [ 2.6363, -3.0624], + [ 1.9993, -2.3289], + [-4.7110, 4.4369], + [-5.3091, 5.1998], + [-5.4645, 5.4629], + [-6.5304, 6.4091], + [-5.2447, 5.1523], + [ 3.7666, -4.2508], + [ 2.3746, -2.6603], + [-4.9690, 4.7971], + [-6.3141, 6.2063], + [-6.5577, 6.5322], + [-6.2077, 6.0634], + [ 0.3157, -0.7675], + [-6.2302, 6.1635], + [-6.4674, 6.4155], + [-2.9953, 2.9124], + [ 0.6241, -1.0185], + [-6.2491, 6.1034], + [ 0.5531, -0.8050], + [-1.7834, 1.4999], + [-5.8205, 5.6279], + [ 4.6380, -4.8593], + [ 3.9553, -4.1032], + [-6.4209, 6.3449], + [-5.8488, 5.7073], + [-5.4786, 5.3838], + [ 3.3222, -3.5855], + [ 3.0893, -3.3131], + [-6.5604, 6.4306], + [ 5.3941, -5.6002], + [-1.4275, 1.2282], + [-5.8365, 5.6981], + [-2.8419, 2.4361], + [-1.8131, 1.4505], + [-5.4760, 5.4216], + [ 4.4721, -4.4103], + [-0.0489, -0.1738], + [-6.2889, 6.1679], + [-5.0885, 4.9997], + [-3.4878, 3.3037], + [-6.1205, 5.9752], + [-6.6627, 6.4468], + [ 1.6571, -1.7818], + [ 1.4336, -1.8233], + [-6.2008, 6.1188], + [-1.7520, 1.4755], + [-5.8891, 5.7553], + [-5.4431, 5.3695], + [-6.8316, 6.7138], + [-4.4449, 4.1400], + [-6.4594, 6.3387], + [-5.8680, 5.6613], + [ 2.9240, -3.4563], + [-5.2758, 5.1957], + [ 5.7917, -6.0698], + [ 1.8041, -1.9844], + [ 0.5279, -0.8109], + [-0.7209, 0.6654], + [-4.7428, 4.4400], + [ 3.0756, -3.3496], + [-5.6770, 5.4406], + [-6.1395, 6.0423], + [ 4.0248, -4.2974], + [-3.5084, 3.2497], + [-6.4346, 6.2722], + [ 2.7400, -3.1273], + [-3.5864, 3.2646], + [-5.2207, 5.1339], + [-6.1434, 5.9958], + [-3.7146, 3.7730], + [-5.4234, 5.4048], + [ 2.3306, -2.6122], + [-4.9518, 4.8220], + [ 4.5392, -4.9607], + [-3.6555, 3.3582], + [-6.3886, 6.3755], + [ 5.4835, -5.7288], + [-4.1048, 3.7762], + [-5.4687, 5.3170], + [ 1.8663, -1.9922], + [-5.7107, 5.6155], + [-6.3452, 6.2378], + [ 4.3601, -4.6942], + [-4.0381, 3.6993], + [-2.7236, 2.3874], + [-6.5377, 6.4596], + [ 4.7388, -4.9618], + [-4.1445, 4.0147], + [-3.2394, 3.0597], + [-6.4798, 6.4951], + [ 3.4776, -3.5738], + [-3.8995, 3.7070], + [-6.2740, 6.1853], + [ 0.5614, -0.6362], + [ 2.2524, -2.6752], + [ 1.2732, -1.3505], + [-5.8915, 5.7498], + [-5.4571, 5.2319], + [-5.3127, 5.2048], + [-5.2430, 5.1641], + [-6.2102, 5.9860], + [-4.7992, 4.6211], + [-5.5375, 5.3835], + [ 2.8780, -3.1837], + [-4.9512, 4.8370], + [-1.0507, 0.7574], + [-5.4784, 5.4218], + [-6.7357, 6.5769], + [ 2.6258, -2.7774], + [-5.3999, 5.3837], + [-6.1618, 6.0641], + [-6.5539, 6.4052], + [-5.7047, 5.4800], + [-5.9583, 5.7362], + [-6.0462, 5.8599], + [-5.5653, 5.4468], + [-3.2844, 3.1273], + [-5.6074, 5.4260], + [ 2.5300, -2.5976], + [ 0.5777, -0.8317], + [ 0.6132, -0.9072], + [ 0.6569, -0.8719], + [ 1.6783, -2.0261], + [ 2.8796, -3.1696], + [-3.9023, 3.7957], + [-5.3278, 5.0901], + [ 2.7020, -3.1465], + [-5.6132, 5.4999], + [-4.9417, 4.7985], + [-1.8972, 1.6548], + [-5.2411, 5.0297], + [-6.7010, 6.6222], + [-2.4601, 2.3636], + [-5.0536, 4.8660], + [-6.8066, 6.6947], + [-5.5768, 5.3394], + [ 2.8493, -3.1505], + [ 2.1578, -2.2780], + [-2.7945, 2.4581], + [ 1.8096, -2.1536], + [ 4.8536, -5.3112], + [-6.5955, 6.5189], + [-5.3050, 5.1755], + [-1.0797, 0.7674], + [ 4.0221, -4.1076], + [-5.8016, 5.5423], + [ 2.3813, -2.7556], + [-5.9712, 5.7764], + [-4.3260, 4.0495], + [ 1.7486, -1.7339], + [-3.1676, 2.9579], + [-2.1937, 1.8397], + [ 3.4994, -3.9541], + [ 1.7120, -2.1627], + [-5.9394, 5.8448], + [ 3.8194, -4.0644], + [-6.0477, 5.9222], + [-6.2928, 6.1914], + [-6.3272, 6.2460], + [-6.3325, 6.2397], + [-6.3505, 6.3380], + [-6.4864, 6.4145], + [-0.3602, 0.1024], + [-2.4344, 2.4202], + [ 3.6166, -3.9446], + [-5.6174, 5.3376], + [-4.6129, 4.5141], + [ 1.2075, -1.6555], + [ 2.0389, -2.2325], + [-2.2212, 1.7691], + [-6.5713, 6.4609], + [-6.4067, 6.2907], + [-3.0928, 3.0259], + [-6.0855, 5.9309], + [ 3.8809, -4.3669], + [ 1.6139, -1.7099], + [ 4.5166, -4.6806], + [-6.3722, 6.2121], + [-4.0291, 3.7882], + [-4.7705, 4.6050], + [ 4.3351, -4.3133], + [ 3.7733, -4.0610], + [-5.1520, 4.9450], + [-6.2179, 6.0689], + [-2.3152, 2.0459], + [-6.5443, 6.3840], + [-6.0921, 5.9201], + [-5.5987, 5.3928], + [ 4.3545, -4.4497], + [-2.6241, 2.3742], + [-5.7945, 5.6997], + [ 4.3511, -4.6238], + [-6.0649, 5.9275], + [ 4.2543, -4.2981], + [-2.5154, 2.2707], + [-2.5161, 2.1183], + [-6.0145, 5.9079], + [ 2.3531, -2.7195], + [ 3.0510, -3.2850], + [-6.4918, 6.4250], + [ 0.8675, -1.1974], + [-3.3242, 3.0893], + [-5.6376, 5.5594], + [ 1.1190, -1.2452], + [ 0.3243, -0.8332], + [ 4.6855, -5.0153], + [ 6.1836, -6.5228], + [ 5.1656, -5.2867], + [-4.5296, 4.2178], + [ 5.0112, -5.2582], + [-6.0914, 5.9777], + [-4.1383, 3.8311], + [-5.7958, 5.7127], + [-6.0971, 5.9261], + [-1.1757, 0.8619], + [-5.4053, 5.3000], + [-6.2923, 6.1649], + [ 0.7009, -1.0186], + [-1.4210, 1.0664], + [-6.3270, 6.1932], + [-1.2537, 1.1562], + [-6.0069, 5.8797], + [-5.6599, 5.6020], + [ 2.3311, -2.4260], + [-5.7985, 5.7553], + [-6.4522, 6.3028], + [-4.9559, 4.8248], + [ 4.0879, -4.2899], + [ 0.7942, -1.0209], + [-5.5883, 5.5024], + [-6.4251, 6.3315], + [-3.5305, 3.1899], + [-6.2622, 6.1444], + [ 0.0996, -0.3755], + [-6.2556, 6.1156], + [ 4.0997, -4.3398], + [-5.7470, 5.4743], + [-3.8332, 3.6157], + [-5.0088, 4.8831], + [ 6.1854, -6.4655], + [-2.1577, 1.9844], + [-5.4153, 5.2443], + [-0.5237, 0.2588], + [-2.3327, 2.0834], + [-6.5066, 6.3763], + [ 2.8637, -3.2300], + [-4.4745, 4.1783], + [-6.5421, 6.4459], + [ 5.5750, -5.8852], + [-6.2609, 6.0258], + [-5.6381, 5.5488], + [-5.0543, 4.8133], + [-4.5764, 4.5048], + [ 2.4264, -2.6103], + [ 3.4640, -3.7113], + [-5.7992, 5.6979], + [-6.5570, 6.4394], + [ 3.3719, -3.7170], + [-0.9757, 0.6231], + [ 1.1199, -1.5381], + [ 0.8814, -1.1955], + [-0.6904, 0.3592], + [ 4.7367, -5.1311], + [-0.5066, 0.2567], + [-6.6573, 6.5252], + [-3.0955, 2.9379], + [-6.3793, 6.2702], + [ 2.1331, -2.3009], + [-5.3317, 5.2110], + [-4.8029, 4.5264], + [ 5.7838, -5.8829], + [-6.2802, 6.1446], + [ 4.5606, -4.8015], + [-5.0533, 4.9458], + [ 4.1380, -4.3886], + [-6.0061, 5.9311], + [-5.3932, 5.1911], + [-5.7829, 5.6079], + [-5.5976, 5.4961], + [-5.6084, 5.5007], + [-3.4526, 3.1738], + [-6.5224, 6.4362], + [-4.7406, 4.6855], + [-3.0675, 2.8289], + [-4.8587, 4.5570], + [ 1.5335, -1.6823], + [-6.3425, 6.1066], + [-1.0500, 0.5476], + [ 5.7313, -5.7412], + [-3.0771, 2.6016], + [-6.3664, 6.2771], + [ 0.8845, -0.9530], + [-3.3043, 3.0352]], device='cuda:0') +06/01/2024 13:02:40 - INFO - __main__ - tensor([[[ 10.7942, 7.8841], + [ 7.8841, 10.9539]], + + [[ 14.3409, -3.5613], + [ -3.5613, 13.2836]], + + [[ 90.4758, -81.7634], + [-81.7635, 87.4052]], + + ..., + + [[ 10.6902, 7.9168], + [ 7.9168, 10.8867]], + + [[ 22.2903, -15.8677], + [-15.8677, 22.0143]], + + [[ 97.1826, -92.1228], + [-92.1228, 107.1599]]], device='cuda:0') +06/01/2024 13:02:40 - INFO - __main__ - ***** Completed training ***** +06/01/2024 13:02:45 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 13:02:45 - INFO - __main__ - ***** Starting script ***** +06/01/2024 13:02:46 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 13:02:47 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/adapter_config.json +06/01/2024 13:02:47 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'mrpc'. +06/01/2024 13:02:47 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_adapter.bin +06/01/2024 13:02:47 - INFO - adapters.loading - Loading module configuration from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/head_config.json +06/01/2024 13:02:47 - INFO - adapters.heads.model_mixin - Adding head 'mrpc' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 13:02:47 - INFO - adapters.loading - Loading module weights from ./outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_model_head.bin +06/01/2024 13:02:47 - INFO - __main__ - Adapter Name = mrpc +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_down.0.bias +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.weight +06/01/2024 13:02:47 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.mrpc.adapter_up.bias +06/01/2024 13:02:47 - INFO - __main__ - heads.mrpc.1.weight +06/01/2024 13:02:47 - INFO - __main__ - heads.mrpc.1.bias +06/01/2024 13:02:47 - INFO - __main__ - heads.mrpc.4.weight +06/01/2024 13:02:47 - INFO - __main__ - heads.mrpc.4.bias +06/01/2024 13:02:54 - INFO - __main__ - Sample 1698 of the training set: {'input_ids': [0, 113, 7908, 2156, 1603, 197, 28, 342, 15, 3120, 14, 1170, 7, 860, 7, 21280, 3345, 11, 49, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2, 2, 113, 1603, 197, 28, 15, 3120, 14, 3881, 7, 21280, 3345, 11, 1603, 128, 29, 2274, 40, 28, 11907, 342, 159, 2156, 22, 37, 26, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 13:02:54 - INFO - __main__ - Sample 1183 of the training set: {'input_ids': [0, 32110, 1637, 21, 5304, 23, 68, 41682, 4, 3248, 1589, 42100, 4, 2466, 41, 10671, 23, 10775, 5050, 2156, 519, 15199, 62, 7, 68, 41821, 4, 1096, 480, 10, 672, 45, 450, 187, 902, 158, 479, 2, 2, 32110, 1637, 21, 5304, 23, 68, 40156, 4, 3506, 1589, 40598, 4, 996, 41, 10671, 23, 321, 1497, 5050, 2156, 519, 385, 26587, 25, 239, 25, 68, 40598, 4, 1244, 480, 10, 672, 45, 450, 187, 902, 564, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 13:02:54 - INFO - __main__ - Sample 1178 of the training set: {'input_ids': [0, 104, 1290, 4, 610, 9153, 8, 3045, 4572, 1981, 30790, 7, 1994, 479, 2, 2, 133, 117, 12, 35624, 58, 12274, 4, 610, 9153, 9, 5517, 8, 3045, 4572, 9, 1261, 479, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 13:05:22 - INFO - __main__ - f_mu shape : torch.Size([408, 2]) +06/01/2024 13:05:22 - INFO - __main__ - f_var shape : torch.Size([408, 2, 2]) +06/01/2024 13:05:22 - INFO - __main__ - tensor([[-7.0287, 6.9556], + [ 5.6864, -6.1212], + [ 0.5206, -0.5153], + [-3.1650, 3.0086], + [ 6.8242, -7.0653], + [-6.9522, 6.8282], + [-3.5284, 3.3449], + [-5.2007, 5.1123], + [-6.7103, 6.6386], + [-6.6970, 6.5781], + [-6.3361, 6.2608], + [ 6.1078, -6.2059], + [ 2.4871, -2.5953], + [-4.9768, 4.8104], + [-6.9060, 6.9090], + [-0.5028, 0.2515], + [-6.9271, 6.8205], + [ 5.3464, -5.4622], + [-6.0254, 5.9391], + [ 1.6643, -1.8697], + [ 4.7027, -4.9844], + [-2.9713, 2.6528], + [ 2.4031, -2.6639], + [-7.1485, 7.1155], + [-5.3810, 5.3015], + [-4.1110, 3.7334], + [ 1.5608, -1.9583], + [-7.0669, 7.0367], + [-3.9070, 3.6065], + [-6.2163, 6.0933], + [ 2.8560, -3.0034], + [-5.9894, 5.9166], + [-4.6401, 4.4385], + [-5.3653, 5.2184], + [-7.1289, 7.0184], + [-1.2589, 0.9047], + [ 6.4446, -6.6135], + [ 4.7515, -4.6419], + [-6.0239, 5.9013], + [-7.1036, 7.0651], + [ 2.3871, -2.5468], + [-6.9732, 6.9039], + [ 5.3201, -5.6068], + [ 5.3929, -5.7120], + [ 1.8640, -2.1702], + [-6.6312, 6.5229], + [-5.6072, 5.4765], + [ 5.5041, -5.7635], + [-7.3614, 7.2647], + [-7.1098, 6.9808], + [-5.7805, 5.5096], + [-6.8174, 6.6314], + [-7.0412, 6.8804], + [-7.0544, 6.9517], + [-4.9859, 4.9052], + [-6.7424, 6.6730], + [-4.0371, 3.8262], + [-6.9825, 6.8933], + [-6.9169, 6.8279], + [-6.3306, 6.2190], + [ 4.2392, -4.6615], + [ 3.0025, -3.0033], + [-6.8447, 6.7693], + [-6.8254, 6.7104], + [-3.4572, 3.1782], + [ 5.1675, -5.1917], + [-7.3939, 7.3351], + [-7.0515, 6.9691], + [ 1.5631, -1.6333], + [-7.1145, 6.9763], + [-6.8648, 6.8202], + [-5.1902, 5.0977], + [-6.3600, 6.1992], + [-6.4617, 6.3032], + [-5.5717, 5.5123], + [-6.9302, 6.8306], + [-6.8239, 6.7976], + [-6.0337, 5.9469], + [-7.1506, 7.0713], + [-6.7501, 6.6196], + [-6.1524, 5.9605], + [-5.2874, 5.2133], + [-7.1075, 6.9370], + [ 5.6326, -5.9281], + [-6.1952, 6.1067], + [-1.7916, 1.4104], + [-6.7949, 6.6048], + [ 2.4929, -2.8573], + [-7.2216, 7.0581], + [-6.8081, 6.6686], + [ 6.7209, -7.0316], + [-6.7750, 6.7867], + [-6.5013, 6.3345], + [ 7.4589, -7.5741], + [-6.5229, 6.4009], + [-6.8486, 6.7807], + [ 4.1682, -4.5589], + [-5.9285, 5.9956], + [-1.2029, 0.9019], + [-7.1407, 6.9582], + [-6.6677, 6.5232], + [ 5.8159, -5.8344], + [-7.1081, 7.0275], + [-7.0835, 6.9756], + [ 6.7777, -6.9665], + [-7.0259, 6.8993], + [ 3.4217, -3.7193], + [ 4.7215, -4.9624], + [ 7.0273, -7.2850], + [-4.5797, 4.2880], + [-2.8170, 2.7028], + [-4.4979, 4.4084], + [-6.4667, 6.3546], + [-6.9350, 6.8635], + [ 4.0030, -4.4003], + [ 2.2110, -2.5130], + [-5.6695, 5.4429], + [-5.7016, 5.5943], + [-6.4742, 6.4746], + [-7.1217, 7.0153], + [-6.0760, 5.9836], + [ 4.5689, -5.0349], + [ 3.2328, -3.5222], + [-5.9915, 5.8393], + [-7.0220, 6.9360], + [-7.1573, 7.1312], + [-7.0781, 6.9605], + [ 2.7541, -3.1611], + [-6.9305, 6.8830], + [-7.0880, 7.0376], + [-3.1669, 3.0808], + [ 1.1198, -1.5111], + [-6.9326, 6.8125], + [ 0.4836, -0.7295], + [-1.3396, 1.0495], + [-6.5782, 6.4140], + [ 6.0529, -6.2516], + [ 4.9873, -5.1382], + [-6.9327, 6.8708], + [-6.5773, 6.4634], + [-6.1188, 6.0310], + [ 4.0656, -4.3317], + [ 4.0953, -4.2882], + [-7.1816, 7.0695], + [ 6.7797, -6.9709], + [-1.7727, 1.5870], + [-6.6648, 6.5392], + [-3.6766, 3.2854], + [-2.6203, 2.2520], + [-6.3153, 6.2720], + [ 6.3107, -6.3005], + [ 0.2629, -0.4888], + [-6.8547, 6.7392], + [-5.8853, 5.7948], + [-3.8640, 3.6896], + [-6.8239, 6.7009], + [-7.2726, 7.0909], + [ 2.4980, -2.6055], + [ 1.5204, -1.9063], + [-6.8048, 6.7329], + [-2.8384, 2.5512], + [-6.7795, 6.6577], + [-6.1893, 6.1266], + [-7.3724, 7.2631], + [-5.5446, 5.2973], + [-7.1371, 7.0340], + [-6.7349, 6.5444], + [ 4.7344, -5.2225], + [-5.8692, 5.7817], + [ 7.0569, -7.2681], + [ 3.1360, -3.3050], + [ 1.4660, -1.7710], + [-0.0241, -0.0260], + [-6.1099, 5.8891], + [ 3.6526, -3.9239], + [-6.7487, 6.5730], + [-7.0031, 6.9239], + [ 5.3234, -5.5824], + [-5.3640, 5.1665], + [-7.0987, 6.9644], + [ 3.2662, -3.6439], + [-4.8503, 4.5934], + [-5.9182, 5.8168], + [-6.8631, 6.7435], + [-4.7714, 4.8609], + [-5.9146, 5.8862], + [ 3.1321, -3.4161], + [-5.9531, 5.8427], + [ 5.8548, -6.2133], + [-4.7279, 4.4766], + [-7.1057, 7.0771], + [ 6.4833, -6.6978], + [-4.8967, 4.5786], + [-6.0690, 5.9217], + [ 3.1292, -3.2924], + [-6.4395, 6.3501], + [-6.9810, 6.8867], + [ 5.7431, -6.0348], + [-5.2363, 4.9450], + [-5.1010, 4.8461], + [-7.1443, 7.0777], + [ 5.7358, -5.9299], + [-4.5414, 4.4194], + [-4.5803, 4.4171], + [-7.1743, 7.1814], + [ 4.2868, -4.3826], + [-4.8229, 4.6555], + [-6.9986, 6.9158], + [ 1.3776, -1.4508], + [ 3.0187, -3.4578], + [ 2.3060, -2.4000], + [-6.7279, 6.6043], + [-6.4252, 6.2399], + [-5.9754, 5.8737], + [-6.3289, 6.2460], + [-7.0578, 6.8737], + [-5.7640, 5.6095], + [-5.9292, 5.7819], + [ 4.2770, -4.5596], + [-6.1270, 6.0206], + [-1.7448, 1.4505], + [-6.4258, 6.3936], + [-7.3437, 7.2022], + [ 3.9061, -4.0237], + [-6.2135, 6.2050], + [-7.0106, 6.9404], + [-7.1580, 7.0345], + [-6.5804, 6.3981], + [-6.7784, 6.6087], + [-6.8636, 6.7050], + [-6.1196, 6.0080], + [-3.8984, 3.7516], + [-6.6894, 6.5436], + [ 3.3799, -3.4596], + [ 2.1734, -2.4621], + [ 0.6306, -0.9406], + [-0.2939, 0.0642], + [ 3.5409, -3.8825], + [ 3.4961, -3.7861], + [-2.7195, 2.5671], + [-6.3475, 6.1609], + [ 3.4893, -3.9308], + [-6.0210, 5.9056], + [-5.8082, 5.6841], + [-3.0942, 2.8729], + [-5.8755, 5.6823], + [-7.3632, 7.2955], + [-2.8146, 2.7135], + [-6.0203, 5.8575], + [-7.2603, 7.1588], + [-6.8136, 6.6581], + [ 4.7758, -4.9889], + [ 2.9836, -3.1125], + [-2.5080, 2.1281], + [ 3.0848, -3.4241], + [ 6.2599, -6.6266], + [-7.0586, 6.9872], + [-6.0013, 5.8884], + [-1.6128, 1.2830], + [ 5.3098, -5.3719], + [-6.7065, 6.4837], + [ 4.5809, -4.9095], + [-6.7964, 6.6149], + [-5.4765, 5.2394], + [ 2.9801, -2.9525], + [-3.5625, 3.3436], + [-3.4086, 3.0564], + [ 4.9599, -5.3849], + [ 2.7883, -3.2467], + [-6.9726, 6.9140], + [ 5.0092, -5.2436], + [-6.8089, 6.7115], + [-7.0096, 6.9262], + [-6.9526, 6.8741], + [-6.9062, 6.8235], + [-7.0710, 7.0615], + [-7.1227, 7.0564], + [ 0.6091, -0.8357], + [-2.6647, 2.6475], + [ 4.5688, -4.8862], + [-6.5328, 6.3069], + [-4.9919, 4.8918], + [ 1.9744, -2.4192], + [ 3.7041, -3.8560], + [-2.8866, 2.4279], + [-7.1408, 7.0396], + [-7.0546, 6.9575], + [-3.9202, 3.8303], + [-6.8524, 6.7235], + [ 5.1026, -5.5493], + [ 2.7965, -2.8699], + [ 6.3790, -6.5001], + [-6.9893, 6.8651], + [-5.5017, 5.2946], + [-5.5642, 5.4247], + [ 5.4130, -5.4100], + [ 5.6482, -5.8842], + [-6.3875, 6.2211], + [-6.9559, 6.8337], + [-2.9942, 2.7230], + [-7.0632, 6.9163], + [-6.8483, 6.7017], + [-6.5615, 6.3946], + [ 5.4765, -5.5828], + [-3.5005, 3.2486], + [-6.3067, 6.2112], + [ 5.5313, -5.7919], + [-6.6475, 6.5359], + [ 5.3367, -5.3645], + [-2.0929, 1.8286], + [-4.1987, 3.8728], + [-6.7972, 6.7067], + [ 2.6250, -2.9667], + [ 4.2487, -4.4489], + [-7.0586, 6.9965], + [ 0.9681, -1.3147], + [-3.9914, 3.7618], + [-6.2387, 6.1640], + [ 1.4617, -1.5817], + [ 0.5604, -1.0665], + [ 5.8215, -6.1208], + [ 7.2067, -7.4843], + [ 6.0189, -6.1119], + [-5.6863, 5.4391], + [ 6.3181, -6.5159], + [-6.4996, 6.3959], + [-4.7130, 4.4189], + [-6.2177, 6.1377], + [-6.9175, 6.7834], + [-1.6985, 1.4165], + [-5.7982, 5.6974], + [-7.0594, 6.9523], + [ 0.6887, -1.0189], + [-1.6891, 1.3225], + [-7.0373, 6.9128], + [-2.6454, 2.5832], + [-6.8997, 6.7985], + [-6.1121, 6.0553], + [ 2.3910, -2.5008], + [-6.7425, 6.7055], + [-7.1138, 6.9836], + [-6.0977, 5.9692], + [ 5.6145, -5.8165], + [ 0.8338, -1.0702], + [-5.9555, 5.8708], + [-7.1244, 7.0451], + [-5.4114, 5.1327], + [-6.6827, 6.5760], + [ 0.5539, -0.8326], + [-6.8922, 6.7759], + [ 5.3748, -5.6011], + [-6.5907, 6.3568], + [-4.2811, 4.0717], + [-5.7806, 5.6581], + [ 7.1048, -7.3711], + [-2.3257, 2.1512], + [-6.4234, 6.2736], + [ 0.0768, -0.3381], + [-3.2988, 3.0404], + [-7.0429, 6.9317], + [ 4.1294, -4.4822], + [-5.6900, 5.4293], + [-7.1226, 7.0299], + [ 6.5557, -6.8252], + [-6.9596, 6.7734], + [-6.0709, 5.9785], + [-6.0693, 5.8804], + [-5.5455, 5.4934], + [ 3.3453, -3.5205], + [ 4.7978, -5.0039], + [-6.4686, 6.3755], + [-7.1492, 7.0491], + [ 4.3421, -4.6656], + [-0.6184, 0.2664], + [ 3.6167, -4.0242], + [ 1.0463, -1.3585], + [-0.1981, -0.1450], + [ 6.0826, -6.4142], + [-0.7680, 0.5078], + [-7.2617, 7.1461], + [-5.2338, 5.0976], + [-6.8375, 6.7363], + [ 3.4630, -3.6132], + [-5.7818, 5.6643], + [-5.6011, 5.3396], + [ 6.7803, -6.8533], + [-7.1897, 7.0901], + [ 5.7381, -5.9533], + [-5.6087, 5.4989], + [ 5.0748, -5.3117], + [-6.8591, 6.8075], + [-6.1277, 5.9516], + [-6.7812, 6.6313], + [-6.0636, 5.9672], + [-6.4636, 6.3769], + [-4.3267, 4.0815], + [-7.0370, 6.9594], + [-5.3519, 5.3013], + [-4.0604, 3.8368], + [-6.1742, 5.9328], + [ 2.1479, -2.2810], + [-6.9240, 6.7229], + [-0.3911, -0.1172], + [ 6.6681, -6.7041], + [-4.1965, 3.7145], + [-7.0063, 6.9308], + [ 1.7724, -1.8304], + [-4.3223, 4.0624]], device='cuda:0') +06/01/2024 13:05:22 - INFO - __main__ - tensor([[[ 17.0987, 12.8737], + [ 12.8737, 17.3398]], + + [[ 41.0728, -16.5073], + [ -16.5073, 35.5610]], + + [[ 160.6691, -147.4137], + [-147.4136, 156.7026]], + + ..., + + [[ 17.0904, 12.7638], + [ 12.7638, 17.3654]], + + [[ 48.6553, -38.3268], + [ -38.3268, 47.9726]], + + [[ 197.6615, -190.0297], + [-190.0297, 219.1508]]], device='cuda:0') +06/01/2024 13:05:22 - INFO - __main__ - ***** Completed training ***** diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..685d48ad9e52f733154d8ff7e9ccb2e5dc7a09bf --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85862ef53cde04fd095621971c40cd6d4aa56cadff8274c52cfddf86adefe180 +size 68 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..b69fd2c49adc7229e89b203011d20edeefe60acf --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1a9346ab14168a80c973e1a5e3b6b0c6f05eb6b62263987fc724c387cf5061d +size 68606 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..f953b1c9d95777f562392959f81da8557db4fd02 --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8875908da7a136b6600fcb2d3ffe61c9a5a8a14a8a36d535486ed9ff8f8c3638 +size 6120 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..20bb6ce641e763317e792a16bfdc4a374c42799f --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe9f2bec627fac5d0b50392516d59184df547f09babf0e417e119c429e05f08 +size 68 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd5fccbd098123b143d046cb2d5d623948d446a --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78135c57fba98a9328bb1737b3062ac46ab32fdffb55fad82ef0fa5b729fa354 +size 69312 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..3896df750d1ce238e81c296ef800ec8c56a7a8b8 --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e3619ce929c748a3e942288b03a8049ad12664e6dc2539b41f78c11773fd28 +size 6135 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..223f5cec986a6d383b8e82339792787a258d7025 --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1683d8b9f46dedb78e1f52b1461afd9ca4fce40efc968789f3349f99200edf7b +size 68 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..99a136fd0ae8159aff735821d7d41d399b5102d7 --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ab816f2f13ecc0b1092f1bd232250458c175b0b2b1be4d215087dc87484083 +size 69609 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..b0f1d448404f072bd9ba94e709d621edeb3592a3 --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e954435a28a22d98c309e35890e95341ac6ebe05e1fd4239aa73917ddb6828ba +size 6140 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..7ba8e82392fc000888552151ef73044da9cd85fe --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea24cf08b332ef63661e79349c2726d91a13c88b123de1f1a9f45d0fd0efb678 +size 68 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..007878ad29e75ecad0975d1c5a50f9d56d5bb703 --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f01440458407852c94ec2911403408fb46d8b32653ceebe3521c392bb941ea9c +size 69605 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..583fee05c61a324f68291413679fcea4dbf34f4b --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb9450b874031afbccdb0f88c8b64592c0b48fa8c4e5012eeb1512d3317443f +size 6141 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..4f45f7e9a3fe591d222c78f2fda13d3d04c13456 --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ca717ae0c92c0c36e0b69043c2a58c826f15aa4a50c648241e966d674dc939 +size 68 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..6fece635c0d3246db9f3b3f008dd8d4f6f0a0231 --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0c48326f7c9ad696851403ee3ddd3d665965af0927486ce3ffdf6d2bb4f1ea +size 69520 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..67270d29424781924332e93debd76b2d9abfe869 --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac3ab9a5bc1e08c5e762f46ca33fb2b9dcbb37c7bdaa5a7128f2caa1fbce17b +size 6158 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..292da0016e37fbe0173071a083e581b947fd1442 --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20e9cb38fa598e2a2fadbda0b65b3399620b47e889415dd194b51d72b12c739c +size 67 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..ccd999816cc1eb45fea16f60631b6fb5a62bb8fe --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6abb8bb1807411171defbca338a07fdf00fb51969dd519c1765c6dc0eca20c8a +size 69452 diff --git a/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..887f1ac753f332f23a55aca54d0e30c6784bd1fc --- /dev/null +++ b/outputs/mrpc/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56065fb4c2b702f88ea2353a999e34dada18cab1e7634c2e746b5bf8e3887722 +size 6166 diff --git a/outputs/rte/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log b/outputs/rte/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log new file mode 100644 index 0000000000000000000000000000000000000000..1482b32550923ce434a65aae62c8e730fa728be6 --- /dev/null +++ b/outputs/rte/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log @@ -0,0 +1,113 @@ +06/01/2024 12:29:28 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:29:28 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:29:29 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:29:30 - INFO - adapters.loading - Loading module configuration from ./outputs/rte/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/adapter_config.json +06/01/2024 12:29:30 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'rte'. +06/01/2024 12:29:30 - INFO - adapters.loading - Loading module weights from ./outputs/rte/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_adapter.bin +06/01/2024 12:29:30 - INFO - adapters.loading - Loading module configuration from ./outputs/rte/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/head_config.json +06/01/2024 12:29:30 - INFO - adapters.heads.model_mixin - Adding head 'rte' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:29:30 - INFO - adapters.loading - Loading module weights from ./outputs/rte/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_model_head.bin +06/01/2024 12:29:30 - INFO - __main__ - Adapter Name = rte +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.rte.adapter_down.0.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.rte.adapter_down.0.bias +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.rte.adapter_up.weight +06/01/2024 12:29:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.rte.adapter_up.bias +06/01/2024 12:29:30 - INFO - __main__ - heads.rte.1.weight +06/01/2024 12:29:30 - INFO - __main__ - heads.rte.1.bias +06/01/2024 12:29:30 - INFO - __main__ - heads.rte.4.weight +06/01/2024 12:29:30 - INFO - __main__ - heads.rte.4.bias +06/01/2024 12:29:39 - INFO - __main__ - Sample 849 of the training set: {'input_ids': [101, 6964, 1010, 3438, 3867, 1997, 1996, 9735, 2853, 2005, 1037, 2644, 2006, 1996, 21994, 25618, 2778, 1010, 2005, 2742, 1010, 5258, 2416, 2420, 3188, 2000, 1996, 2265, 1012, 102, 2087, 7562, 2069, 5271, 3438, 3867, 1997, 1996, 9735, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:29:39 - INFO - __main__ - Sample 591 of the training set: {'input_ids': [101, 2047, 3414, 1005, 1055, 15644, 3691, 2056, 5958, 2008, 2009, 2145, 6118, 28085, 2015, 2493, 2013, 2478, 2505, 2060, 2084, 2440, 2394, 1010, 2021, 2008, 4923, 2097, 2022, 2445, 2065, 1996, 3437, 1000, 4415, 3065, 1996, 3223, 4824, 1010, 1000, 2130, 2065, 2009, 3397, 3793, 1011, 3713, 1012, 102, 2440, 2394, 2003, 6749, 2011, 2047, 3414, 1005, 1055, 15644, 3691, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:29:39 - INFO - __main__ - Sample 589 of the training set: {'input_ids': [101, 2630, 3137, 13891, 2003, 1037, 7506, 1997, 11843, 13116, 9099, 25434, 3840, 1010, 9413, 3619, 14919, 2028, 1012, 102, 2630, 3137, 13891, 8617, 9413, 3619, 14919, 2028, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log new file mode 100644 index 0000000000000000000000000000000000000000..82d35d5b1d9a1df3f08b13e8de4707c9bc013d8b --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log @@ -0,0 +1,852 @@ +06/01/2024 13:06:47 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 13:06:47 - INFO - __main__ - ***** Starting script ***** +06/01/2024 13:06:49 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 13:06:50 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/adapter_config.json +06/01/2024 13:06:50 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'. +06/01/2024 13:06:50 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_adapter.bin +06/01/2024 13:06:50 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/head_config.json +06/01/2024 13:06:50 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 13:06:50 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_model_head.bin +06/01/2024 13:06:50 - INFO - __main__ - Adapter Name = sst2 +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight +06/01/2024 13:06:50 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias +06/01/2024 13:06:50 - INFO - __main__ - heads.sst2.1.weight +06/01/2024 13:06:50 - INFO - __main__ - heads.sst2.1.bias +06/01/2024 13:06:50 - INFO - __main__ - heads.sst2.4.weight +06/01/2024 13:06:50 - INFO - __main__ - heads.sst2.4.bias +06/01/2024 13:07:03 - INFO - __main__ - Sample 27180 of the training set: {'input_ids': [0, 31313, 110, 471, 15, 5, 2418, 11, 760, 9, 47, 2156, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 13:07:03 - INFO - __main__ - Sample 18929 of the training set: {'input_ids': [0, 21680, 5564, 12308, 4187, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 13:07:03 - INFO - __main__ - Sample 18854 of the training set: {'input_ids': [0, 354, 543, 7, 1137, 54, 16, 11277, 54, 50, 596, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 13:37:30 - INFO - __main__ - f_mu shape : torch.Size([872, 2]) +06/01/2024 13:37:30 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2]) +06/01/2024 13:37:30 - INFO - __main__ - tensor([[-0.1175, 0.0181], + [-0.0932, 0.0212], + [-0.0826, 0.1354], + ..., + [-0.1280, 0.0850], + [-0.1668, -0.0445], + [-0.0796, 0.1498]], device='cuda:0') +06/01/2024 13:37:30 - INFO - __main__ - tensor([[[3.7302, 3.6951], + [3.6951, 3.7307]], + + [[3.9195, 3.8845], + [3.8845, 3.9188]], + + [[3.4971, 3.4915], + [3.4915, 3.4952]], + + ..., + + [[3.8639, 3.8249], + [3.8249, 3.8717]], + + [[4.3521, 4.2403], + [4.2403, 4.3567]], + + [[3.6967, 3.6904], + [3.6904, 3.6948]]], device='cuda:0') +06/01/2024 13:37:30 - INFO - __main__ - ***** Completed training ***** +06/01/2024 13:37:35 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 13:37:35 - INFO - __main__ - ***** Starting script ***** +06/01/2024 13:37:36 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 13:37:37 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/adapter_config.json +06/01/2024 13:37:37 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'. +06/01/2024 13:37:37 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_adapter.bin +06/01/2024 13:37:37 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/head_config.json +06/01/2024 13:37:37 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 13:37:37 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_model_head.bin +06/01/2024 13:37:37 - INFO - __main__ - Adapter Name = sst2 +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight +06/01/2024 13:37:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias +06/01/2024 13:37:37 - INFO - __main__ - heads.sst2.1.weight +06/01/2024 13:37:37 - INFO - __main__ - heads.sst2.1.bias +06/01/2024 13:37:37 - INFO - __main__ - heads.sst2.4.weight +06/01/2024 13:37:37 - INFO - __main__ - heads.sst2.4.bias +06/01/2024 13:37:51 - INFO - __main__ - Sample 27180 of the training set: {'input_ids': [0, 31313, 110, 471, 15, 5, 2418, 11, 760, 9, 47, 2156, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 13:37:51 - INFO - __main__ - Sample 18929 of the training set: {'input_ids': [0, 21680, 5564, 12308, 4187, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 13:37:51 - INFO - __main__ - Sample 18854 of the training set: {'input_ids': [0, 354, 543, 7, 1137, 54, 16, 11277, 54, 50, 596, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 14:09:47 - INFO - __main__ - f_mu shape : torch.Size([872, 2]) +06/01/2024 14:09:47 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2]) +06/01/2024 14:09:47 - INFO - __main__ - tensor([[-4.3516, 4.3793], + [ 1.4191, -1.5803], + [-2.1876, 2.1257], + ..., + [ 1.9708, -2.1873], + [ 2.2869, -2.5290], + [-1.9137, 1.7891]], device='cuda:0') +06/01/2024 14:09:47 - INFO - __main__ - tensor([[[3.0858, 2.6685], + [2.6685, 3.0816]], + + [[1.7477, 0.7704], + [0.7704, 1.7841]], + + [[1.6739, 1.0564], + [1.0564, 1.6753]], + + ..., + + [[1.8996, 1.0167], + [1.0167, 1.9072]], + + [[2.3364, 0.7846], + [0.7846, 2.3533]], + + [[2.6514, 0.5887], + [0.5887, 2.6427]]], device='cuda:0') +06/01/2024 14:09:47 - INFO - __main__ - ***** Completed training ***** +06/01/2024 14:09:51 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 14:09:51 - INFO - __main__ - ***** Starting script ***** +06/01/2024 14:09:53 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 14:09:54 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/adapter_config.json +06/01/2024 14:09:54 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'. +06/01/2024 14:09:54 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_adapter.bin +06/01/2024 14:09:54 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/head_config.json +06/01/2024 14:09:54 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 14:09:54 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_model_head.bin +06/01/2024 14:09:54 - INFO - __main__ - Adapter Name = sst2 +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight +06/01/2024 14:09:54 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias +06/01/2024 14:09:54 - INFO - __main__ - heads.sst2.1.weight +06/01/2024 14:09:54 - INFO - __main__ - heads.sst2.1.bias +06/01/2024 14:09:54 - INFO - __main__ - heads.sst2.4.weight +06/01/2024 14:09:54 - INFO - __main__ - heads.sst2.4.bias +06/01/2024 14:10:08 - INFO - __main__ - Sample 27180 of the training set: {'input_ids': [0, 31313, 110, 471, 15, 5, 2418, 11, 760, 9, 47, 2156, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 14:10:08 - INFO - __main__ - Sample 18929 of the training set: {'input_ids': [0, 21680, 5564, 12308, 4187, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 14:10:08 - INFO - __main__ - Sample 18854 of the training set: {'input_ids': [0, 354, 543, 7, 1137, 54, 16, 11277, 54, 50, 596, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 14:41:50 - INFO - __main__ - f_mu shape : torch.Size([872, 2]) +06/01/2024 14:41:50 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2]) +06/01/2024 14:41:50 - INFO - __main__ - tensor([[-3.4851, 3.4706], + [ 1.8035, -2.1720], + [-2.0108, 1.9059], + ..., + [ 1.8282, -2.1493], + [ 2.4943, -2.8568], + [-1.8243, 1.6598]], device='cuda:0') +06/01/2024 14:41:50 - INFO - __main__ - tensor([[[3.0886, 2.8256], + [2.8256, 3.0867]], + + [[2.0868, 1.3034], + [1.3034, 2.1473]], + + [[1.8606, 1.3018], + [1.3018, 1.8694]], + + ..., + + [[2.2732, 1.2384], + [1.2384, 2.3072]], + + [[2.6797, 1.9473], + [1.9473, 2.7070]], + + [[2.5788, 1.2291], + [1.2291, 2.5951]]], device='cuda:0') +06/01/2024 14:41:50 - INFO - __main__ - ***** Completed training ***** +06/01/2024 14:41:54 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 14:41:54 - INFO - __main__ - ***** Starting script ***** +06/01/2024 14:41:56 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 14:41:57 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/adapter_config.json +06/01/2024 14:41:57 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'. +06/01/2024 14:41:57 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_adapter.bin +06/01/2024 14:41:57 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/head_config.json +06/01/2024 14:41:57 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 14:41:57 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_model_head.bin +06/01/2024 14:41:57 - INFO - __main__ - Adapter Name = sst2 +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:57 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:58 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:58 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:58 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight +06/01/2024 14:41:58 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias +06/01/2024 14:41:58 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight +06/01/2024 14:41:58 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias +06/01/2024 14:41:58 - INFO - __main__ - heads.sst2.1.weight +06/01/2024 14:41:58 - INFO - __main__ - heads.sst2.1.bias +06/01/2024 14:41:58 - INFO - __main__ - heads.sst2.4.weight +06/01/2024 14:41:58 - INFO - __main__ - heads.sst2.4.bias +06/01/2024 14:42:11 - INFO - __main__ - Sample 27180 of the training set: {'input_ids': [0, 31313, 110, 471, 15, 5, 2418, 11, 760, 9, 47, 2156, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 14:42:11 - INFO - __main__ - Sample 18929 of the training set: {'input_ids': [0, 21680, 5564, 12308, 4187, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 14:42:11 - INFO - __main__ - Sample 18854 of the training set: {'input_ids': [0, 354, 543, 7, 1137, 54, 16, 11277, 54, 50, 596, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 15:14:12 - INFO - __main__ - f_mu shape : torch.Size([872, 2]) +06/01/2024 15:14:12 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2]) +06/01/2024 15:14:12 - INFO - __main__ - tensor([[-3.7316, 3.6685], + [ 1.1468, -1.5242], + [-2.6522, 2.5532], + ..., + [ 1.2350, -1.5590], + [ 1.5950, -2.0001], + [-2.4371, 2.2927]], device='cuda:0') +06/01/2024 15:14:12 - INFO - __main__ - tensor([[[3.2525, 2.9948], + [2.9948, 3.2480]], + + [[1.6816, 1.1446], + [1.1446, 1.7518]], + + [[2.2956, 1.7450], + [1.7450, 2.3015]], + + ..., + + [[1.9268, 1.1819], + [1.1819, 1.9669]], + + [[2.2516, 1.3688], + [1.3688, 2.3541]], + + [[2.7138, 1.8654], + [1.8654, 2.7105]]], device='cuda:0') +06/01/2024 15:14:12 - INFO - __main__ - ***** Completed training ***** +06/01/2024 15:14:17 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 15:14:17 - INFO - __main__ - ***** Starting script ***** +06/01/2024 15:14:18 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 15:14:19 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/adapter_config.json +06/01/2024 15:14:19 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'. +06/01/2024 15:14:19 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_adapter.bin +06/01/2024 15:14:19 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/head_config.json +06/01/2024 15:14:19 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 15:14:19 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_model_head.bin +06/01/2024 15:14:19 - INFO - __main__ - Adapter Name = sst2 +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight +06/01/2024 15:14:19 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias +06/01/2024 15:14:19 - INFO - __main__ - heads.sst2.1.weight +06/01/2024 15:14:19 - INFO - __main__ - heads.sst2.1.bias +06/01/2024 15:14:19 - INFO - __main__ - heads.sst2.4.weight +06/01/2024 15:14:19 - INFO - __main__ - heads.sst2.4.bias +06/01/2024 15:14:33 - INFO - __main__ - Sample 27180 of the training set: {'input_ids': [0, 31313, 110, 471, 15, 5, 2418, 11, 760, 9, 47, 2156, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 15:14:33 - INFO - __main__ - Sample 18929 of the training set: {'input_ids': [0, 21680, 5564, 12308, 4187, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 15:14:33 - INFO - __main__ - Sample 18854 of the training set: {'input_ids': [0, 354, 543, 7, 1137, 54, 16, 11277, 54, 50, 596, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 15:47:17 - INFO - __main__ - f_mu shape : torch.Size([872, 2]) +06/01/2024 15:47:17 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2]) +06/01/2024 15:47:17 - INFO - __main__ - tensor([[-4.3747, 4.2525], + [ 1.4266, -1.7934], + [-3.2415, 3.1081], + ..., + [ 1.8102, -2.1354], + [ 2.4906, -2.9155], + [-3.0739, 2.8752]], device='cuda:0') +06/01/2024 15:47:17 - INFO - __main__ - tensor([[[3.2600, 3.0213], + [3.0213, 3.2581]], + + [[1.8684, 0.9023], + [0.9023, 1.9562]], + + [[2.5688, 1.9142], + [1.9142, 2.5736]], + + ..., + + [[2.2494, 1.2257], + [1.2257, 2.2900]], + + [[2.6328, 2.0371], + [2.0371, 2.6702]], + + [[2.9172, 1.9559], + [1.9559, 2.9137]]], device='cuda:0') +06/01/2024 15:47:17 - INFO - __main__ - ***** Completed training ***** +06/01/2024 15:47:21 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 15:47:21 - INFO - __main__ - ***** Starting script ***** +06/01/2024 15:47:23 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 15:47:24 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/adapter_config.json +06/01/2024 15:47:24 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'. +06/01/2024 15:47:24 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_adapter.bin +06/01/2024 15:47:24 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/head_config.json +06/01/2024 15:47:24 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 15:47:24 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_model_head.bin +06/01/2024 15:47:24 - INFO - __main__ - Adapter Name = sst2 +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:24 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight +06/01/2024 15:47:25 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias +06/01/2024 15:47:25 - INFO - __main__ - heads.sst2.1.weight +06/01/2024 15:47:25 - INFO - __main__ - heads.sst2.1.bias +06/01/2024 15:47:25 - INFO - __main__ - heads.sst2.4.weight +06/01/2024 15:47:25 - INFO - __main__ - heads.sst2.4.bias +06/01/2024 15:47:39 - INFO - __main__ - Sample 27180 of the training set: {'input_ids': [0, 31313, 110, 471, 15, 5, 2418, 11, 760, 9, 47, 2156, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 15:47:39 - INFO - __main__ - Sample 18929 of the training set: {'input_ids': [0, 21680, 5564, 12308, 4187, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 15:47:39 - INFO - __main__ - Sample 18854 of the training set: {'input_ids': [0, 354, 543, 7, 1137, 54, 16, 11277, 54, 50, 596, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 16:20:13 - INFO - __main__ - f_mu shape : torch.Size([872, 2]) +06/01/2024 16:20:13 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2]) +06/01/2024 16:20:13 - INFO - __main__ - tensor([[-4.1001, 3.9933], + [ 1.1518, -1.5043], + [-3.0729, 2.9452], + ..., + [ 1.7970, -2.1329], + [ 2.1996, -2.6129], + [-2.8836, 2.6891]], device='cuda:0') +06/01/2024 16:20:14 - INFO - __main__ - tensor([[[3.2955, 3.0520], + [3.0520, 3.2953]], + + [[1.7170, 0.8602], + [0.8602, 1.8046]], + + [[2.6859, 2.0362], + [2.0362, 2.6918]], + + ..., + + [[2.2505, 1.3622], + [1.3622, 2.3027]], + + [[2.6141, 1.6050], + [1.6050, 2.7073]], + + [[3.0271, 2.1365], + [2.1365, 3.0267]]], device='cuda:0') +06/01/2024 16:20:14 - INFO - __main__ - ***** Completed training ***** diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..1457d6a955dc13dd94934d22ba62a2365646ed68 --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43b5a1e7afb1b704e9205cf83bcadd3cac66326d39c7a0891e9129a5e049ab57 +size 37 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8f33f8145e681dd56ab61bcc1f77407941e2c7 --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b86dcfe991c51823f4875de03bd9f43cbed6bf2ab956aaa657d275cfed11e9 +size 146699 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..544bf8e787c3b4ddf712802c18d2a5b1b13531cd --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8afa112c8cbb83749459c233dae94b16ec6c0b6eb9a207906ffe6d56699c6c6 +size 6172 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..6f86d7f2eb7bd621fb09209cab38807956e408cf --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c310907632740010960e9baa6985373acebf3a9a4702c841ebc9d8422d3a62ea +size 37 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa0df5b18fd57b3750ca30e9c85f6421bfaeb0e --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4128219fa5b179873f87b160f78b75c1577d1dbc29d2f1b6f987a91ab219b7b +size 149116 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..878ebf327438636e8fd857de65aca6d2376b402c --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:600a285b78927b0e914ae951ea34ef0ba2d220f969d26c3ab251d73584b6c5d6 +size 6185 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..817a3e08d1c2acb6ddf0cf7aceb20f293e3befce --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44562ee2887055840a930f83cdd82d0400a2b84168eced49dc5193488cb7983f +size 37 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..bfaedec2b14cb9a9eb8a3ea9a7de920f3b4b3063 --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dccd43794601f5e8faca7f980aa32b5691df0a6489b4e4b7404b595fc31f1899 +size 148956 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..5e993f9f7fb9b5290d572d6216e1f1701c7830ab --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b5b41405d41bd6b28cd65808487a7bbeee3842c08e47db25de40b0816a3a9d +size 6198 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..3b2a29a5d4b3f0ab0acc4b9e43f5dae3068911e7 --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36bf92e55b36367662823996dc0ac803f6195964c62cef9700b285fb3af18ead +size 37 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..3128f338c60af69f577e65a3e48b2a96fa84c936 --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:711b8ebdb2ab9980f9d94c875685f2234a63bf510d0eda272652a5795385f4b1 +size 148745 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..3be8edae02ef97bd9348ede80e3ecf146d9c714f --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098af54073d7cc60b508b7030eedb16e16bd74cfceb6ff3bcd5ea495ba2ffbbe +size 6206 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..fc979fd402e9386047a6e0c42fa955bc25b4ad38 --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cba29bbc7a0d82e22a401747b0dadff0aae94d9994c48d2212c73c9a9337ac5 +size 37 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..ba2dbedda5078c87e1f203b206d1bb2bc0afc6aa --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d34d9c635e58371078c519723c2ede273623bded46d04fa089e22546b12370 +size 149491 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..0eb023af8166a91ecfc0a5da6bc3cbc3c0f30b9c --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cdd0b0848e9526eff281ef525c0550c55aa63bb9d07fc61d1176689c0dea389 +size 6207 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..fc979fd402e9386047a6e0c42fa955bc25b4ad38 --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cba29bbc7a0d82e22a401747b0dadff0aae94d9994c48d2212c73c9a9337ac5 +size 37 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..cffc5177a99a42d405d8a3370d5344b4664fd162 --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc68c5cf97bc1b4b66f37f6f6bfe10280edf807f0c07965302db46b6bd7b71bf +size 149226 diff --git a/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..1703d04d430ac6f465178dd98037e6516a88fe02 --- /dev/null +++ b/outputs/sst2/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0fac2770975788497c45ed249e561f952d6be8a915728f668f6a53a8cb19ee9 +size 6209 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log new file mode 100644 index 0000000000000000000000000000000000000000..1793d39bb102ec25a8a87fda306fa6fb0a29a783 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log @@ -0,0 +1,2394 @@ +06/01/2024 12:34:57 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:34:57 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:34:57 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:34:58 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/adapter_config.json +06/01/2024 12:34:58 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:34:58 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_adapter.bin +06/01/2024 12:34:58 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/head_config.json +06/01/2024 12:34:58 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:34:58 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_model_head.bin +06/01/2024 12:34:58 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:34:58 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:34:58 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:34:58 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:34:58 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:34:58 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:35:00 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [101, 1996, 2103, 2473, 3549, 4188, 1996, 28337, 1037, 9146, 2138, 2027, 8615, 4808, 1012, 102, 1996, 2103, 2473, 3549, 8615, 4808, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:35:00 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [101, 3021, 2979, 1996, 2208, 11097, 2000, 2198, 2138, 2010, 2735, 2001, 2279, 1012, 102, 3021, 1005, 1055, 2735, 2001, 2279, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:35:00 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [101, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 2010, 2608, 1012, 102, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 1996, 2879, 1005, 1055, 2608, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:35:43 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:35:43 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:35:43 - INFO - __main__ - tensor([[ 0.0009, -0.1178], + [ 0.0137, -0.1045], + [ 0.0336, -0.0181], + [ 0.1571, 0.0669], + [ 0.0134, -0.0637], + [-0.0273, -0.0778], + [ 0.0777, -0.0325], + [-0.0298, -0.1429], + [ 0.0214, -0.1161], + [ 0.0999, -0.0541], + [ 0.0522, -0.0805], + [-0.0610, -0.0761], + [ 0.0097, -0.0040], + [ 0.0204, -0.1141], + [-0.0330, -0.0111], + [ 0.0055, -0.0121], + [ 0.0530, -0.1127], + [-0.0469, -0.1002], + [ 0.0261, 0.0023], + [ 0.0234, -0.0093], + [ 0.0847, -0.0841], + [-0.0099, -0.1366], + [ 0.0186, -0.0539], + [ 0.0437, -0.0155], + [ 0.0027, -0.0885], + [-0.0119, 0.0007], + [-0.0912, -0.0408], + [ 0.1159, -0.0787], + [ 0.0683, -0.0777], + [ 0.0097, -0.0561], + [-0.0614, -0.1025], + [ 0.1040, -0.0798], + [-0.0291, -0.0361], + [-0.0269, -0.1369], + [ 0.0086, -0.0127], + [ 0.0326, -0.0550], + [ 0.0535, -0.1430], + [ 0.0414, -0.0958], + [ 0.0835, -0.1159], + [-0.0300, -0.1298], + [-0.0008, -0.1613], + [-0.0592, -0.0650], + [ 0.0419, -0.0324], + [-0.0165, 0.0621], + [-0.0685, -0.1365], + [-0.0178, 0.0526], + [ 0.0307, -0.1741], + [ 0.0391, -0.0901], + [ 0.0233, -0.0470], + [ 0.0255, -0.0772], + [ 0.0167, -0.1553], + [ 0.0936, -0.0876], + [-0.0391, -0.1378], + [ 0.0415, 0.0272], + [ 0.0158, -0.0764], + [-0.0003, -0.0143], + [-0.0116, -0.0280], + [-0.0647, -0.0682], + [ 0.0578, -0.0858], + [ 0.0459, -0.1240], + [-0.0253, -0.0258], + [ 0.0153, -0.0823], + [ 0.0414, -0.0357], + [ 0.1209, -0.0821], + [ 0.1170, -0.0057], + [-0.0534, -0.0735], + [-0.0829, -0.0692], + [ 0.1419, -0.0606], + [ 0.0266, -0.0446], + [ 0.0014, -0.0635], + [ 0.0454, -0.0585]], device='cuda:0') +06/01/2024 12:35:43 - INFO - __main__ - tensor([[[29.9453, 28.0008], + [28.0008, 29.9612]], + + [[31.9930, 30.4328], + [30.4328, 31.9217]], + + [[32.4100, 28.8610], + [28.8610, 32.3692]], + + [[34.2691, 31.1135], + [31.1135, 34.3078]], + + [[29.3117, 28.2267], + [28.2267, 29.4247]], + + [[31.1508, 29.8484], + [29.8484, 31.2277]], + + [[33.1731, 31.0573], + [31.0573, 33.2641]], + + [[31.8011, 30.3085], + [30.3085, 31.8268]], + + [[32.6860, 31.3024], + [31.3024, 32.7173]], + + [[34.6994, 33.1062], + [33.1062, 34.5816]], + + [[33.0086, 31.4930], + [31.4930, 33.0252]], + + [[29.7873, 28.3350], + [28.3350, 29.7268]], + + [[32.4113, 30.1537], + [30.1537, 32.3876]], + + [[33.8808, 32.2902], + [32.2902, 34.0239]], + + [[34.5134, 31.8399], + [31.8399, 34.5884]], + + [[31.8221, 30.2628], + [30.2628, 31.7491]], + + [[30.7132, 29.4734], + [29.4734, 30.6686]], + + [[32.5729, 31.4670], + [31.4670, 32.6599]], + + [[32.1784, 30.4507], + [30.4507, 32.2386]], + + [[33.3475, 31.4382], + [31.4382, 33.2616]], + + [[35.5875, 34.2542], + [34.2542, 35.7654]], + + [[30.3079, 28.9107], + [28.9107, 30.2537]], + + [[29.9143, 28.0365], + [28.0365, 29.7823]], + + [[31.9327, 30.5564], + [30.5564, 32.0150]], + + [[31.0334, 29.3552], + [29.3552, 31.1150]], + + [[33.7400, 32.0287], + [32.0287, 33.9532]], + + [[32.2973, 30.0596], + [30.0596, 32.2582]], + + [[32.5141, 30.8800], + [30.8800, 32.4706]], + + [[33.0415, 31.3659], + [31.3659, 33.0050]], + + [[31.1263, 29.2315], + [29.2315, 31.0007]], + + [[31.8435, 30.0219], + [30.0219, 31.8692]], + + [[35.8335, 34.1897], + [34.1897, 36.0373]], + + [[30.2131, 27.6923], + [27.6923, 30.1202]], + + [[31.3917, 29.7678], + [29.7678, 31.4083]], + + [[32.0807, 30.5958], + [30.5958, 32.2076]], + + [[27.6689, 25.2366], + [25.2366, 27.5944]], + + [[34.1641, 32.6990], + [32.6990, 34.1667]], + + [[32.6843, 31.1026], + [31.1026, 32.6694]], + + [[34.4840, 32.6197], + [32.6197, 34.3159]], + + [[30.8685, 28.0930], + [28.0930, 30.8473]], + + [[31.9206, 29.2273], + [29.2273, 31.9033]], + + [[29.0860, 27.4096], + [27.4096, 28.9955]], + + [[32.5905, 31.1279], + [31.1279, 32.6408]], + + [[30.1682, 28.3406], + [28.3406, 30.1283]], + + [[31.8016, 30.7313], + [30.7313, 31.7226]], + + [[34.3744, 32.2056], + [32.2056, 34.1356]], + + [[31.3049, 30.0175], + [30.0175, 31.1757]], + + [[33.4755, 32.1923], + [32.1923, 33.6228]], + + [[31.4676, 30.2409], + [30.2409, 31.6243]], + + [[32.9486, 30.9037], + [30.9037, 33.0549]], + + [[31.0879, 29.8016], + [29.8016, 30.9500]], + + [[34.6100, 32.8657], + [32.8657, 34.6324]], + + [[33.3696, 32.1388], + [32.1388, 33.5221]], + + [[34.7734, 32.9788], + [32.9788, 34.8690]], + + [[29.3649, 27.6221], + [27.6221, 29.3873]], + + [[32.7280, 30.8288], + [30.8288, 32.7645]], + + [[32.2641, 30.9368], + [30.9368, 32.3248]], + + [[31.9294, 30.0858], + [30.0858, 31.9307]], + + [[33.1870, 31.4074], + [31.4074, 33.1754]], + + [[32.8930, 31.1964], + [31.1964, 33.1805]], + + [[32.8349, 30.7036], + [30.7035, 32.8285]], + + [[29.9355, 28.3801], + [28.3801, 29.9828]], + + [[32.8626, 31.2322], + [31.2322, 32.6913]], + + [[34.5132, 33.2150], + [33.2150, 34.4167]], + + [[33.4594, 32.3431], + [32.3431, 33.5575]], + + [[32.3784, 31.0880], + [31.0880, 32.4027]], + + [[28.5404, 26.6220], + [26.6220, 28.6154]], + + [[34.1343, 32.5193], + [32.5192, 34.0406]], + + [[30.8274, 29.8379], + [29.8379, 30.9665]], + + [[31.9420, 30.4030], + [30.4030, 32.1193]], + + [[31.7083, 30.0662], + [30.0662, 31.5556]]], device='cuda:0') +06/01/2024 12:35:43 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:35:48 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:35:48 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:35:49 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:35:50 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/adapter_config.json +06/01/2024 12:35:50 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:35:50 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_adapter.bin +06/01/2024 12:35:50 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/head_config.json +06/01/2024 12:35:50 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:35:50 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_model_head.bin +06/01/2024 12:35:50 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:35:50 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:35:50 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:35:50 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:35:50 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:35:50 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:35:52 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [101, 1996, 2103, 2473, 3549, 4188, 1996, 28337, 1037, 9146, 2138, 2027, 8615, 4808, 1012, 102, 1996, 2103, 2473, 3549, 8615, 4808, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:35:52 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [101, 3021, 2979, 1996, 2208, 11097, 2000, 2198, 2138, 2010, 2735, 2001, 2279, 1012, 102, 3021, 1005, 1055, 2735, 2001, 2279, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:35:52 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [101, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 2010, 2608, 1012, 102, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 1996, 2879, 1005, 1055, 2608, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:36:34 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:36:34 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:36:34 - INFO - __main__ - tensor([[-0.3466, 0.2642], + [ 0.3451, -0.4818], + [ 0.6060, -0.7521], + [ 1.0716, -0.6286], + [ 1.2631, -0.9402], + [-1.5742, 1.3780], + [ 0.3164, -0.2045], + [ 1.4190, -1.3733], + [-0.4361, 0.4041], + [ 0.1318, -0.0037], + [-0.6620, 0.4645], + [ 0.3001, -0.2438], + [-0.9180, 0.8620], + [-1.7977, 1.7829], + [-0.5891, 0.5598], + [ 0.1925, -0.0392], + [ 0.9910, -1.0019], + [-0.7173, 0.7256], + [-1.1374, 1.1603], + [-0.0218, 0.1684], + [-0.3444, 0.4968], + [-2.2754, 1.8980], + [-0.6768, 0.7215], + [ 0.9747, -0.8786], + [-1.1926, 1.0064], + [ 0.3375, -0.3595], + [ 0.4871, -0.5155], + [ 0.1844, -0.0119], + [ 0.1127, -0.2056], + [-0.6621, 0.6918], + [ 1.4000, -1.3188], + [-1.3792, 1.4977], + [ 0.2070, -0.2011], + [ 1.1654, -1.1391], + [-1.6938, 1.6782], + [ 0.8454, -0.6875], + [-2.0858, 2.0762], + [-0.0929, 0.0742], + [-0.2762, 0.2878], + [-0.4807, 0.2922], + [-0.5598, 0.2624], + [-0.7193, 0.5475], + [-0.6092, 0.6738], + [ 0.7165, -0.5752], + [-0.5504, 0.4121], + [-0.2181, 0.3402], + [ 0.2717, -0.4269], + [-1.4694, 1.6280], + [ 0.7407, -0.7489], + [ 0.0738, -0.0925], + [ 0.1208, -0.3810], + [-0.3948, 0.4900], + [-0.6471, 0.4959], + [ 0.0033, 0.0566], + [ 1.3915, -1.0398], + [-1.4176, 1.5177], + [ 0.6106, -0.3926], + [ 1.4743, -1.3245], + [ 0.6623, -0.5821], + [ 1.3003, -1.2372], + [-1.0337, 1.0058], + [ 1.2535, -0.9520], + [ 0.6860, -0.6068], + [ 0.6696, -0.4431], + [-0.7142, 0.6395], + [ 0.9666, -0.9102], + [ 0.0219, -0.1821], + [-0.9000, 0.9726], + [ 0.7054, -0.6568], + [-0.6726, 0.6867], + [-0.9891, 0.8847]], device='cuda:0') +06/01/2024 12:36:34 - INFO - __main__ - tensor([[[ 3.6095, 1.4381], + [ 1.4381, 3.5013]], + + [[ 5.1335, 1.8372], + [ 1.8372, 5.0026]], + + [[ 4.3734, 1.6564], + [ 1.6564, 4.2212]], + + [[ 5.8845, 2.0477], + [ 2.0477, 5.7107]], + + [[ 6.2006, 0.7892], + [ 0.7892, 5.7644]], + + [[ 6.4957, 2.6048], + [ 2.6048, 6.3431]], + + [[ 4.4940, 1.5921], + [ 1.5921, 4.3416]], + + [[ 6.8619, 2.4199], + [ 2.4199, 6.5561]], + + [[ 4.6337, 1.2595], + [ 1.2596, 4.4676]], + + [[ 5.4649, 1.2318], + [ 1.2318, 5.3646]], + + [[ 4.3012, 1.3446], + [ 1.3446, 4.1645]], + + [[ 4.9823, 0.5175], + [ 0.5175, 4.6768]], + + [[ 3.8701, 1.6757], + [ 1.6757, 3.8447]], + + [[ 5.3704, 2.9860], + [ 2.9860, 5.2949]], + + [[ 4.5441, 1.3487], + [ 1.3487, 4.3951]], + + [[ 3.9298, 1.2185], + [ 1.2185, 3.8227]], + + [[ 4.7303, 2.1710], + [ 2.1710, 4.6254]], + + [[ 3.9029, 1.7207], + [ 1.7207, 3.8629]], + + [[ 4.2162, 1.7868], + [ 1.7868, 4.1889]], + + [[ 3.7987, 1.2505], + [ 1.2505, 3.8737]], + + [[ 7.8164, 1.7798], + [ 1.7798, 7.7550]], + + [[10.9071, 1.5748], + [ 1.5748, 9.7570]], + + [[ 4.2434, 1.1526], + [ 1.1526, 4.1960]], + + [[ 5.9391, 0.7437], + [ 0.7437, 5.6865]], + + [[ 5.0330, 1.8730], + [ 1.8730, 4.9337]], + + [[ 4.4610, 1.4850], + [ 1.4850, 4.4513]], + + [[ 4.2354, 0.9441], + [ 0.9441, 4.1308]], + + [[ 3.8309, 1.0656], + [ 1.0656, 3.7515]], + + [[ 4.3015, 1.5577], + [ 1.5577, 4.1651]], + + [[ 3.9945, 1.1697], + [ 1.1697, 3.9217]], + + [[ 6.7181, 2.0805], + [ 2.0805, 6.3171]], + + [[ 6.6773, 2.2376], + [ 2.2376, 6.6757]], + + [[ 4.4722, 1.1292], + [ 1.1292, 4.6119]], + + [[ 6.2374, 2.1480], + [ 2.1480, 6.0461]], + + [[ 4.7267, 2.7495], + [ 2.7495, 4.6224]], + + [[ 3.9933, 1.5231], + [ 1.5231, 3.9156]], + + [[ 6.3354, 3.3032], + [ 3.3032, 6.2136]], + + [[ 4.6652, 0.8457], + [ 0.8457, 4.7465]], + + [[ 4.2927, 1.6460], + [ 1.6460, 4.2670]], + + [[ 3.9450, 1.2930], + [ 1.2930, 3.7240]], + + [[ 3.9756, 1.4912], + [ 1.4912, 3.8535]], + + [[ 3.8990, 1.2205], + [ 1.2205, 3.8050]], + + [[ 4.4881, 1.1261], + [ 1.1261, 4.3499]], + + [[ 3.9686, 1.7353], + [ 1.7353, 3.8985]], + + [[ 4.4406, 0.9901], + [ 0.9901, 4.3443]], + + [[ 5.4012, 0.4639], + [ 0.4639, 4.9481]], + + [[ 5.0649, 0.7564], + [ 0.7564, 4.7832]], + + [[ 4.7188, 2.2665], + [ 2.2665, 4.6183]], + + [[ 5.8700, 1.3610], + [ 1.3610, 5.5902]], + + [[ 4.0936, 1.0728], + [ 1.0728, 3.9971]], + + [[ 5.4133, 0.8582], + [ 0.8582, 5.2402]], + + [[ 8.3151, 1.9133], + [ 1.9133, 8.2371]], + + [[ 3.8662, 1.7446], + [ 1.7446, 3.7893]], + + [[ 3.5094, 1.2117], + [ 1.2117, 3.4199]], + + [[ 5.8182, 2.0898], + [ 2.0898, 5.6002]], + + [[ 4.5969, 1.8277], + [ 1.8277, 4.6003]], + + [[ 3.6258, 1.5102], + [ 1.5102, 3.5808]], + + [[ 6.3595, 2.5672], + [ 2.5672, 6.1223]], + + [[ 4.3023, 1.7416], + [ 1.7416, 4.2875]], + + [[ 5.7449, 2.0549], + [ 2.0549, 5.4758]], + + [[ 4.2852, 2.0765], + [ 2.0765, 4.2003]], + + [[ 5.9658, 2.1610], + [ 2.1610, 5.7554]], + + [[ 4.1715, 1.8707], + [ 1.8707, 4.1073]], + + [[ 6.1422, 0.3995], + [ 0.3995, 6.1192]], + + [[ 4.1918, 1.4931], + [ 1.4931, 4.1567]], + + [[ 5.4160, 1.7397], + [ 1.7397, 5.2408]], + + [[ 4.4069, 1.3408], + [ 1.3408, 4.2939]], + + [[ 4.5944, 1.2061], + [ 1.2061, 4.5725]], + + [[ 4.0280, 1.6849], + [ 1.6849, 3.8704]], + + [[ 5.8128, 0.1730], + [ 0.1730, 5.5516]], + + [[ 3.9391, 1.6987], + [ 1.6987, 3.9346]]], device='cuda:0') +06/01/2024 12:36:34 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:36:39 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:36:39 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:36:39 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:36:40 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/adapter_config.json +06/01/2024 12:36:40 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:36:40 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_adapter.bin +06/01/2024 12:36:40 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/head_config.json +06/01/2024 12:36:40 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:36:40 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_model_head.bin +06/01/2024 12:36:40 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:36:40 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:36:40 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:36:40 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:36:40 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:36:40 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:36:42 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [101, 1996, 2103, 2473, 3549, 4188, 1996, 28337, 1037, 9146, 2138, 2027, 8615, 4808, 1012, 102, 1996, 2103, 2473, 3549, 8615, 4808, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:36:42 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [101, 3021, 2979, 1996, 2208, 11097, 2000, 2198, 2138, 2010, 2735, 2001, 2279, 1012, 102, 3021, 1005, 1055, 2735, 2001, 2279, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:36:42 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [101, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 2010, 2608, 1012, 102, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 1996, 2879, 1005, 1055, 2608, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:37:23 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:37:23 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:37:23 - INFO - __main__ - tensor([[-1.8512, 1.7289], + [ 3.8551, -3.7648], + [ 2.7143, -2.6967], + [ 1.4594, -0.9887], + [ 5.6970, -5.0829], + [-3.8632, 3.5403], + [-0.3378, 0.4670], + [ 3.7715, -3.5566], + [-1.3672, 1.5202], + [-0.6896, 0.8898], + [-3.5996, 3.2301], + [ 1.5598, -1.3464], + [-3.4765, 3.3786], + [-5.4410, 5.3285], + [-1.8898, 1.8258], + [-0.6558, 0.8207], + [ 4.1542, -3.8955], + [-2.6737, 2.7225], + [-4.4501, 4.5280], + [ 0.5689, -0.3736], + [-1.6108, 1.7034], + [-3.6189, 3.2600], + [-1.4812, 1.5816], + [ 4.2296, -3.8892], + [-3.5244, 3.2079], + [ 1.4579, -1.3486], + [-0.4623, 0.3772], + [-0.1757, 0.3088], + [ 0.8249, -1.0090], + [-1.2188, 1.3210], + [ 4.1637, -3.7882], + [-3.5847, 3.6435], + [ 2.5004, -2.5497], + [ 2.9812, -2.7767], + [-4.6271, 4.4843], + [ 4.4579, -4.0770], + [-5.6072, 5.4854], + [-0.4620, 0.3452], + [-1.1861, 1.1796], + [-1.5528, 1.1996], + [-1.7304, 1.1884], + [-2.9161, 2.6640], + [-2.9248, 2.9925], + [ 3.0302, -2.7051], + [-4.5215, 4.2832], + [-2.5653, 2.6375], + [ 1.5696, -1.5769], + [-5.1281, 5.1472], + [ 3.5837, -3.4187], + [-0.7035, 0.7307], + [-1.7854, 1.4004], + [-1.4386, 1.4993], + [-2.8719, 2.7329], + [ 0.1067, -0.0216], + [ 4.9241, -4.5122], + [-4.1167, 4.0924], + [ 2.1144, -1.7376], + [ 4.4027, -3.9942], + [ 2.2056, -2.0961], + [ 3.6928, -3.3702], + [-3.5504, 3.3654], + [ 3.4710, -3.1541], + [ 2.7135, -2.5344], + [ 0.5780, -0.2846], + [-3.2328, 3.2610], + [ 3.9087, -3.5915], + [ 2.1078, -2.1899], + [-3.6384, 3.7243], + [ 2.0719, -1.9413], + [-4.7648, 4.7638], + [-3.9494, 3.9093]], device='cuda:0') +06/01/2024 12:37:23 - INFO - __main__ - tensor([[[ 7.9167, -3.2750], + [ -3.2750, 7.7778]], + + [[ 14.6346, -3.2804], + [ -3.2804, 12.9893]], + + [[ 9.1671, -2.0231], + [ -2.0231, 8.2197]], + + [[ 9.4875, -3.6227], + [ -3.6227, 9.0066]], + + [[ 11.5970, 4.1399], + [ 4.1399, 12.2080]], + + [[ 9.9438, 0.7775], + [ 0.7775, 9.7950]], + + [[ 9.5267, -5.3435], + [ -5.3435, 9.0051]], + + [[ 15.4701, -4.4201], + [ -4.4201, 14.0713]], + + [[ 11.2474, -6.2144], + [ -6.2144, 10.7485]], + + [[ 21.8519, -15.9143], + [-15.9143, 20.8067]], + + [[ 12.0009, -3.7725], + [ -3.7725, 12.0433]], + + [[ 65.6266, -56.2832], + [-56.2832, 57.1438]], + + [[ 9.3399, -1.1714], + [ -1.1714, 9.1462]], + + [[ 10.3200, 5.2066], + [ 5.2066, 10.1935]], + + [[ 7.6718, -2.5662], + [ -2.5662, 7.1035]], + + [[ 12.9435, -8.5520], + [ -8.5520, 12.4582]], + + [[ 10.1377, 0.9023], + [ 0.9023, 9.6077]], + + [[ 10.0055, -3.3412], + [ -3.3412, 9.4330]], + + [[ 11.1377, 0.6292], + [ 0.6292, 11.0833]], + + [[ 8.3388, -4.8663], + [ -4.8663, 7.9401]], + + [[ 14.0815, -7.5718], + [ -7.5718, 13.5958]], + + [[ 16.3296, -5.8575], + [ -5.8575, 14.6720]], + + [[ 13.2938, -8.8995], + [ -8.8995, 13.0516]], + + [[ 15.8389, -4.0274], + [ -4.0274, 14.0190]], + + [[ 10.2764, -1.4501], + [ -1.4501, 9.9953]], + + [[ 8.1881, -3.8766], + [ -3.8766, 8.2909]], + + [[ 16.7863, -13.5967], + [-13.5967, 16.9539]], + + [[ 11.4175, -7.9992], + [ -7.9992, 10.7402]], + + [[ 8.7515, -4.2359], + [ -4.2359, 8.1278]], + + [[ 14.3589, -10.3614], + [-10.3614, 13.8693]], + + [[ 16.7396, -4.8935], + [ -4.8934, 16.3842]], + + [[ 11.3178, -1.7772], + [ -1.7772, 11.1902]], + + [[ 13.8798, -6.6140], + [ -6.6140, 11.6809]], + + [[ 11.0240, -2.7065], + [ -2.7065, 10.4757]], + + [[ 8.2149, 3.5826], + [ 3.5826, 8.1975]], + + [[ 17.5216, -6.2727], + [ -6.2727, 16.7932]], + + [[ 10.8647, 5.4206], + [ 5.4206, 10.7295]], + + [[ 13.6354, -9.4136], + [ -9.4136, 13.5809]], + + [[ 6.3554, -1.8547], + [ -1.8547, 6.1366]], + + [[ 7.9743, -3.0572], + [ -3.0572, 7.2663]], + + [[ 6.4805, -1.5042], + [ -1.5043, 6.2282]], + + [[ 11.4756, -4.8530], + [ -4.8530, 10.8832]], + + [[ 23.5967, -15.8561], + [-15.8561, 22.1729]], + + [[ 8.1631, -1.1957], + [ -1.1957, 7.8137]], + + [[ 11.6114, -0.3630], + [ -0.3630, 11.4418]], + + [[ 12.7929, -5.9508], + [ -5.9508, 11.6225]], + + [[ 18.8578, -12.6381], + [-12.6381, 16.9771]], + + [[ 10.8891, 3.3518], + [ 3.3518, 10.6661]], + + [[ 19.1451, -9.0446], + [ -9.0446, 18.3117]], + + [[ 21.6849, -17.7293], + [-17.7292, 21.3564]], + + [[ 50.9668, -44.4848], + [-44.4848, 50.5589]], + + [[ 19.0558, -11.9943], + [-11.9943, 18.1859]], + + [[ 7.8210, -1.1745], + [ -1.1745, 7.5809]], + + [[ 8.9910, -5.6942], + [ -5.6942, 8.3869]], + + [[ 15.2915, -1.6155], + [ -1.6155, 14.5555]], + + [[ 9.6525, 0.2545], + [ 0.2545, 9.4945]], + + [[ 12.3078, -7.5699], + [ -7.5699, 11.9411]], + + [[ 15.3993, -2.8465], + [ -2.8465, 15.0854]], + + [[ 7.1033, -1.4739], + [ -1.4739, 7.0627]], + + [[ 13.3767, -3.7596], + [ -3.7596, 12.4425]], + + [[ 6.6603, 1.5305], + [ 1.5305, 6.3847]], + + [[ 13.8916, -4.1934], + [ -4.1934, 12.8211]], + + [[ 10.4732, -3.3322], + [ -3.3322, 10.0229]], + + [[ 28.7033, -23.3341], + [-23.3341, 27.9274]], + + [[ 11.6256, -3.2834], + [ -3.2834, 11.3468]], + + [[ 13.9245, -3.1250], + [ -3.1250, 12.6981]], + + [[ 65.5379, -54.3499], + [-54.3500, 56.2681]], + + [[ 11.6439, -1.8755], + [ -1.8755, 10.9600]], + + [[ 9.5598, -3.7850], + [ -3.7850, 9.1056]], + + [[ 12.5580, 0.5598], + [ 0.5598, 12.1930]], + + [[ 13.1727, -3.2173], + [ -3.2173, 12.9326]]], device='cuda:0') +06/01/2024 12:37:23 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:37:45 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:37:45 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:37:46 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:37:47 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/adapter_config.json +06/01/2024 12:37:47 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:37:47 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_adapter.bin +06/01/2024 12:37:47 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/head_config.json +06/01/2024 12:37:47 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:37:47 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_model_head.bin +06/01/2024 12:37:47 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:37:47 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:37:47 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:37:47 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:37:47 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:37:47 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:37:49 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [101, 1996, 2103, 2473, 3549, 4188, 1996, 28337, 1037, 9146, 2138, 2027, 8615, 4808, 1012, 102, 1996, 2103, 2473, 3549, 8615, 4808, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:37:49 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [101, 3021, 2979, 1996, 2208, 11097, 2000, 2198, 2138, 2010, 2735, 2001, 2279, 1012, 102, 3021, 1005, 1055, 2735, 2001, 2279, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:37:49 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [101, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 2010, 2608, 1012, 102, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 1996, 2879, 1005, 1055, 2608, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:38:32 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:38:32 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:38:32 - INFO - __main__ - tensor([[-3.4554, 3.2970], + [ 5.6479, -5.4347], + [ 3.8366, -3.7076], + [ 2.0852, -1.5690], + [ 6.9678, -6.4072], + [-5.0581, 4.7439], + [ 0.3131, -0.1054], + [ 4.7499, -4.4422], + [-3.6443, 3.7238], + [-1.6142, 1.9077], + [-5.7875, 5.4557], + [-0.5697, 0.6923], + [-5.1957, 5.0588], + [-6.6535, 6.5472], + [-2.6344, 2.5853], + [-1.3939, 1.5782], + [ 6.0700, -5.7222], + [-4.7801, 4.7511], + [-6.4498, 6.4818], + [ 0.1586, 0.0370], + [-3.3484, 3.4188], + [-4.3374, 4.0534], + [-3.8542, 3.9437], + [ 5.2326, -4.9170], + [-5.2651, 4.9572], + [ 2.7348, -2.4937], + [-1.5963, 1.5901], + [-0.2302, 0.3842], + [ 0.8300, -1.0016], + [-3.2153, 3.2989], + [ 4.7914, -4.3522], + [-5.1167, 5.1448], + [ 3.5668, -3.5046], + [ 3.9095, -3.6386], + [-6.1764, 6.0337], + [ 5.5662, -5.1495], + [-7.1502, 7.0150], + [-1.4474, 1.3339], + [-1.5092, 1.5394], + [-2.4804, 2.1201], + [-2.1285, 1.5660], + [-5.1981, 4.9246], + [-4.6479, 4.6384], + [ 4.7687, -4.3271], + [-6.4606, 6.2026], + [-4.0485, 4.1045], + [ 2.5406, -2.5697], + [-6.5549, 6.5559], + [ 5.2498, -5.0047], + [-3.8089, 3.7797], + [-5.0104, 4.6779], + [-2.4554, 2.5218], + [-4.8869, 4.6689], + [-0.1406, 0.2508], + [ 6.8289, -6.4163], + [-5.7576, 5.7038], + [ 4.4301, -3.9745], + [ 5.5227, -5.0576], + [ 2.5747, -2.4653], + [ 5.4003, -5.0046], + [-5.0329, 4.8029], + [ 4.8214, -4.4634], + [ 3.4206, -3.2268], + [-0.2327, 0.5367], + [-6.2506, 6.2464], + [ 6.3304, -5.9009], + [ 4.8283, -4.6554], + [-5.4226, 5.4542], + [ 3.9102, -3.7475], + [-6.9969, 6.9245], + [-5.9932, 5.9467]], device='cuda:0') +06/01/2024 12:38:32 - INFO - __main__ - tensor([[[ 2.2298e+01, -1.4602e+01], + [-1.4602e+01, 2.2161e+01]], + + [[ 2.3616e+01, -6.6381e+00], + [-6.6380e+00, 2.1151e+01]], + + [[ 1.7377e+01, -7.4134e+00], + [-7.4134e+00, 1.5490e+01]], + + [[ 1.9127e+01, -1.2548e+01], + [-1.2548e+01, 1.8509e+01]], + + [[ 1.3916e+01, 6.4182e+00], + [ 6.4182e+00, 1.4602e+01]], + + [[ 1.3477e+01, 1.9328e-01], + [ 1.9328e-01, 1.3320e+01]], + + [[ 4.1701e+01, -3.6496e+01], + [-3.6496e+01, 3.9766e+01]], + + [[ 3.3064e+01, -1.9513e+01], + [-1.9513e+01, 3.1177e+01]], + + [[ 2.9198e+01, -1.9051e+01], + [-1.9051e+01, 2.7489e+01]], + + [[ 6.9861e+01, -6.0715e+01], + [-6.0715e+01, 6.5231e+01]], + + [[ 1.7133e+01, -2.2018e+00], + [-2.2018e+00, 1.7615e+01]], + + [[ 2.8724e+02, -2.6456e+02], + [-2.6455e+02, 2.5251e+02]], + + [[ 1.5718e+01, -2.3357e+00], + [-2.3357e+00, 1.5268e+01]], + + [[ 1.3005e+01, 7.0249e+00], + [ 7.0249e+00, 1.2799e+01]], + + [[ 1.3470e+01, -6.9489e+00], + [-6.9489e+00, 1.2528e+01]], + + [[ 2.3169e+01, -1.7831e+01], + [-1.7831e+01, 2.2361e+01]], + + [[ 1.3106e+01, 3.9905e+00], + [ 3.9905e+00, 1.2850e+01]], + + [[ 1.7676e+01, -5.2151e+00], + [-5.2151e+00, 1.6654e+01]], + + [[ 1.4800e+01, 4.7257e+00], + [ 4.7257e+00, 1.4441e+01]], + + [[ 3.0853e+01, -2.6238e+01], + [-2.6238e+01, 2.8978e+01]], + + [[ 2.4648e+01, -1.4962e+01], + [-1.4962e+01, 2.3162e+01]], + + [[ 2.3502e+01, -1.1585e+01], + [-1.1585e+01, 2.2097e+01]], + + [[ 2.6985e+01, -1.7139e+01], + [-1.7139e+01, 2.5221e+01]], + + [[ 3.0821e+01, -1.5673e+01], + [-1.5673e+01, 2.7895e+01]], + + [[ 1.7792e+01, -3.8539e+00], + [-3.8539e+00, 1.7482e+01]], + + [[ 2.0392e+01, -1.4485e+01], + [-1.4485e+01, 2.0760e+01]], + + [[ 2.9755e+01, -2.5458e+01], + [-2.5458e+01, 2.9719e+01]], + + [[ 3.5649e+01, -3.0942e+01], + [-3.0942e+01, 3.2887e+01]], + + [[ 2.1662e+01, -1.6654e+01], + [-1.6654e+01, 2.0227e+01]], + + [[ 3.4837e+01, -2.6660e+01], + [-2.6660e+01, 3.2653e+01]], + + [[ 3.4604e+01, -2.1696e+01], + [-2.1696e+01, 3.4738e+01]], + + [[ 1.6054e+01, -2.1441e+00], + [-2.1441e+00, 1.5578e+01]], + + [[ 3.3900e+01, -2.2997e+01], + [-2.2997e+01, 2.8916e+01]], + + [[ 2.3336e+01, -1.2959e+01], + [-1.2959e+01, 2.2392e+01]], + + [[ 1.1607e+01, 5.7416e+00], + [ 5.7416e+00, 1.1550e+01]], + + [[ 2.6134e+01, -1.1408e+01], + [-1.1408e+01, 2.4864e+01]], + + [[ 1.3681e+01, 8.7815e+00], + [ 8.7815e+00, 1.3643e+01]], + + [[ 3.8787e+01, -3.3715e+01], + [-3.3715e+01, 3.8768e+01]], + + [[ 1.1105e+01, -6.1131e+00], + [-6.1131e+00, 1.0674e+01]], + + [[ 1.7896e+01, -1.1568e+01], + [-1.1568e+01, 1.7412e+01]], + + [[ 1.4012e+01, -8.0922e+00], + [-8.0922e+00, 1.3609e+01]], + + [[ 1.5491e+01, -2.4517e+00], + [-2.4517e+00, 1.5041e+01]], + + [[ 4.2684e+01, -2.9481e+01], + [-2.9481e+01, 3.9543e+01]], + + [[ 1.6584e+01, -4.9998e+00], + [-4.9998e+00, 1.5920e+01]], + + [[ 1.4540e+01, 3.6329e+00], + [ 3.6329e+00, 1.4676e+01]], + + [[ 2.4386e+01, -1.3434e+01], + [-1.3434e+01, 2.1686e+01]], + + [[ 6.2573e+01, -5.2255e+01], + [-5.2255e+01, 5.7459e+01]], + + [[ 1.4275e+01, 5.4413e+00], + [ 5.4413e+00, 1.3882e+01]], + + [[ 3.5470e+01, -2.0738e+01], + [-2.0738e+01, 3.5015e+01]], + + [[ 5.4930e+01, -4.5209e+01], + [-4.5209e+01, 5.3783e+01]], + + [[ 7.2941e+01, -6.1139e+01], + [-6.1139e+01, 7.6744e+01]], + + [[ 5.2416e+01, -4.2869e+01], + [-4.2869e+01, 4.8950e+01]], + + [[ 1.5786e+01, -3.7345e+00], + [-3.7345e+00, 1.5555e+01]], + + [[ 1.2841e+01, -9.6265e+00], + [-9.6265e+00, 1.2184e+01]], + + [[ 1.6899e+01, 3.3909e+00], + [ 3.3909e+00, 1.6820e+01]], + + [[ 1.3763e+01, 1.7429e+00], + [ 1.7429e+00, 1.3443e+01]], + + [[ 2.6440e+01, -1.6618e+01], + [-1.6618e+01, 2.6441e+01]], + + [[ 2.4932e+01, -9.7820e+00], + [-9.7820e+00, 2.5540e+01]], + + [[ 1.3907e+01, -7.5407e+00], + [-7.5407e+00, 1.3686e+01]], + + [[ 1.9798e+01, -5.1422e+00], + [-5.1422e+00, 1.8619e+01]], + + [[ 1.1558e+01, 9.2936e-01], + [ 9.2936e-01, 1.1058e+01]], + + [[ 2.5620e+01, -1.2248e+01], + [-1.2248e+01, 2.4006e+01]], + + [[ 2.2278e+01, -1.3490e+01], + [-1.3490e+01, 2.1059e+01]], + + [[ 1.0410e+02, -9.4711e+01], + [-9.4711e+01, 9.6230e+01]], + + [[ 1.3822e+01, 4.6049e+00], + [ 4.6049e+00, 1.3521e+01]], + + [[ 1.6338e+01, 1.7513e+00], + [ 1.7513e+00, 1.6270e+01]], + + [[ 1.0495e+02, -8.4944e+01], + [-8.4944e+01, 9.1635e+01]], + + [[ 1.6715e+01, -9.7552e-01], + [-9.7552e-01, 1.5381e+01]], + + [[ 2.4676e+01, -1.4000e+01], + [-1.4000e+01, 2.2863e+01]], + + [[ 1.3834e+01, 8.0886e+00], + [ 8.0886e+00, 1.3560e+01]], + + [[ 1.7157e+01, -4.8411e-02], + [-4.8411e-02, 1.6529e+01]]], device='cuda:0') +06/01/2024 12:38:33 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:38:37 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:38:37 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:38:38 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:38:38 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/adapter_config.json +06/01/2024 12:38:38 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:38:38 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_adapter.bin +06/01/2024 12:38:38 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/head_config.json +06/01/2024 12:38:38 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:38:38 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_model_head.bin +06/01/2024 12:38:38 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:38:38 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:38:38 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:38:38 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:38:38 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:38:38 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:38:40 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [101, 1996, 2103, 2473, 3549, 4188, 1996, 28337, 1037, 9146, 2138, 2027, 8615, 4808, 1012, 102, 1996, 2103, 2473, 3549, 8615, 4808, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:38:40 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [101, 3021, 2979, 1996, 2208, 11097, 2000, 2198, 2138, 2010, 2735, 2001, 2279, 1012, 102, 3021, 1005, 1055, 2735, 2001, 2279, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:38:40 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [101, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 2010, 2608, 1012, 102, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 1996, 2879, 1005, 1055, 2608, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:39:23 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:39:23 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:39:23 - INFO - __main__ - tensor([[-4.8366, 4.5811], + [ 6.8664, -6.6237], + [ 4.9653, -4.7851], + [ 2.5795, -2.0887], + [ 7.9230, -7.4063], + [-5.5139, 5.1801], + [ 3.1090, -2.8766], + [ 5.9026, -5.5809], + [-4.9232, 4.9205], + [-1.8034, 2.1159], + [-6.4373, 6.0552], + [ 4.0973, -3.7753], + [-5.8541, 5.6489], + [-7.1762, 7.0312], + [-3.2395, 3.1543], + [-0.6772, 0.8413], + [ 7.0164, -6.7019], + [-6.0330, 5.9292], + [-7.3912, 7.3867], + [ 1.0715, -0.8521], + [-4.1051, 4.1163], + [-4.6672, 4.3893], + [-4.1112, 4.1827], + [ 6.1584, -5.8585], + [-5.1555, 4.8273], + [ 4.4110, -4.1360], + [-2.5164, 2.4650], + [-0.5280, 0.6587], + [ 1.5765, -1.7981], + [-2.9785, 3.0610], + [ 6.2089, -5.8072], + [-6.0605, 6.0413], + [ 4.4173, -4.3433], + [ 5.2723, -4.9825], + [-6.9592, 6.7959], + [ 6.7129, -6.3105], + [-7.9291, 7.7543], + [-1.3949, 1.2021], + [-1.3345, 1.3358], + [-2.6356, 2.1981], + [-2.3377, 1.7233], + [-5.3874, 5.0947], + [-5.1324, 5.1123], + [ 5.6607, -5.2217], + [-6.5323, 6.2279], + [-4.1850, 4.2162], + [ 3.8385, -3.8699], + [-7.4069, 7.3446], + [ 6.5871, -6.3771], + [-4.9349, 4.8635], + [-5.3064, 4.9428], + [-2.8070, 2.7820], + [-5.8077, 5.5581], + [ 0.1950, -0.1195], + [ 8.0907, -7.7283], + [-6.5089, 6.4351], + [ 5.5201, -5.1066], + [ 6.9832, -6.5681], + [ 3.4244, -3.3215], + [ 6.6369, -6.2352], + [-6.0530, 5.7818], + [ 6.5383, -6.1840], + [ 4.2762, -4.0928], + [ 1.5925, -1.2883], + [-7.0228, 6.9423], + [ 7.4653, -7.0589], + [ 5.7976, -5.5707], + [-6.7104, 6.6306], + [ 5.0948, -4.9072], + [-7.5399, 7.4274], + [-6.4734, 6.4096]], device='cuda:0') +06/01/2024 12:39:23 - INFO - __main__ - tensor([[[ 44.6187, -31.8543], + [ -31.8542, 45.0875]], + + [[ 26.3573, -2.0528], + [ -2.0528, 24.4106]], + + [[ 27.2751, -12.0014], + [ -12.0014, 24.4708]], + + [[ 32.2183, -23.7153], + [ -23.7153, 31.5493]], + + [[ 17.0337, 11.1793], + [ 11.1793, 17.7868]], + + [[ 18.5760, -1.4769], + [ -1.4769, 18.2911]], + + [[ 182.0382, -168.8640], + [-168.8642, 172.3680]], + + [[ 68.8498, -47.6056], + [ -47.6055, 63.2377]], + + [[ 37.2088, -21.3809], + [ -21.3809, 34.7452]], + + [[ 148.0437, -134.7868], + [-134.7867, 138.6583]], + + [[ 23.7817, -4.1808], + [ -4.1808, 24.3281]], + + [[ 252.4222, -232.1666], + [-232.1666, 234.3349]], + + [[ 21.4498, -3.7093], + [ -3.7093, 20.9116]], + + [[ 16.8116, 8.2043], + [ 8.2043, 16.5387]], + + [[ 22.4543, -13.2566], + [ -13.2566, 20.8437]], + + [[ 36.7847, -31.1874], + [ -31.1874, 35.5645]], + + [[ 16.6270, 7.1680], + [ 7.1680, 16.4391]], + + [[ 24.0285, -4.7155], + [ -4.7155, 22.7545]], + + [[ 17.5434, 9.4934], + [ 9.4934, 17.0231]], + + [[ 76.5150, -68.6882], + [ -68.6882, 71.2327]], + + [[ 42.5087, -28.9196], + [ -28.9196, 39.9470]], + + [[ 32.5764, -17.7183], + [ -17.7182, 30.5654]], + + [[ 44.2785, -32.1914], + [ -32.1914, 41.7890]], + + [[ 39.8906, -19.1941], + [ -19.1941, 36.8245]], + + [[ 27.5992, -12.0972], + [ -12.0972, 27.0776]], + + [[ 47.2082, -35.6929], + [ -35.6929, 46.9132]], + + [[ 34.8547, -28.3994], + [ -28.3994, 34.8022]], + + [[ 78.4135, -71.1907], + [ -71.1907, 72.2941]], + + [[ 46.4657, -38.6572], + [ -38.6572, 42.7972]], + + [[ 61.3171, -52.1968], + [ -52.1968, 57.8559]], + + [[ 43.0955, -23.3258], + [ -23.3258, 43.7586]], + + [[ 22.2265, -2.4108], + [ -2.4108, 21.4492]], + + [[ 74.3471, -56.5384], + [ -56.5384, 63.1226]], + + [[ 54.1092, -36.8724], + [ -36.8724, 51.1545]], + + [[ 15.5325, 7.9071], + [ 7.9071, 15.4603]], + + [[ 29.9895, -8.4386], + [ -8.4387, 29.3153]], + + [[ 16.8804, 12.7675], + [ 12.7675, 16.8675]], + + [[ 74.8385, -68.5930], + [ -68.5931, 74.5272]], + + [[ 17.5489, -11.7112], + [ -11.7111, 16.8813]], + + [[ 29.1310, -21.5317], + [ -21.5317, 28.7532]], + + [[ 22.3304, -15.0397], + [ -15.0397, 21.8068]], + + [[ 25.5761, -10.1978], + [ -10.1977, 24.8544]], + + [[ 86.1296, -67.5604], + [ -67.5604, 79.0996]], + + [[ 23.4593, -6.9219], + [ -6.9219, 22.8097]], + + [[ 23.0738, -2.8471], + [ -2.8471, 23.3753]], + + [[ 43.8119, -30.0597], + [ -30.0597, 38.7030]], + + [[ 96.9374, -80.5687], + [ -80.5687, 87.5551]], + + [[ 17.6065, 8.9173], + [ 8.9173, 17.3032]], + + [[ 40.4018, -17.5732], + [ -17.5732, 38.9120]], + + [[ 85.3421, -70.0574], + [ -70.0574, 83.0814]], + + [[ 136.6728, -123.2728], + [-123.2728, 142.6152]], + + [[ 118.3693, -105.0112], + [-105.0112, 111.2876]], + + [[ 23.1442, -6.0054], + [ -6.0054, 23.1703]], + + [[ 17.5325, -13.9052], + [ -13.9052, 16.7886]], + + [[ 17.4461, 12.6249], + [ 12.6249, 17.4893]], + + [[ 19.0300, 2.1435], + [ 2.1435, 18.4318]], + + [[ 32.0773, -16.8880], + [ -16.8879, 32.2787]], + + [[ 25.1246, -1.5764], + [ -1.5764, 25.7793]], + + [[ 22.9454, -13.4205], + [ -13.4205, 22.3546]], + + [[ 25.0448, -3.3111], + [ -3.3111, 24.0839]], + + [[ 16.8621, 1.4250], + [ 1.4250, 16.3607]], + + [[ 29.8762, -8.0311], + [ -8.0311, 28.9018]], + + [[ 31.4476, -19.1113], + [ -19.1113, 29.8440]], + + [[ 250.9833, -237.4669], + [-237.4670, 239.0613]], + + [[ 18.7306, 5.6304], + [ 5.6304, 18.4017]], + + [[ 17.9482, 8.0337], + [ 8.0337, 18.0537]], + + [[ 129.0117, -102.2211], + [-102.2212, 113.0269]], + + [[ 20.1296, 3.4822], + [ 3.4822, 18.7736]], + + [[ 30.4815, -14.4840], + [ -14.4840, 28.0366]], + + [[ 17.5037, 10.0008], + [ 10.0008, 17.0766]], + + [[ 25.3845, -3.7117], + [ -3.7117, 24.1254]]], device='cuda:0') +06/01/2024 12:39:23 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:39:28 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:39:28 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:39:29 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:39:29 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/adapter_config.json +06/01/2024 12:39:29 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:39:29 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_adapter.bin +06/01/2024 12:39:29 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/head_config.json +06/01/2024 12:39:29 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:39:30 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_model_head.bin +06/01/2024 12:39:30 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:39:30 - INFO - __main__ - bert.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:39:30 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:39:30 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:39:30 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:39:30 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:39:31 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [101, 1996, 2103, 2473, 3549, 4188, 1996, 28337, 1037, 9146, 2138, 2027, 8615, 4808, 1012, 102, 1996, 2103, 2473, 3549, 8615, 4808, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:39:31 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [101, 3021, 2979, 1996, 2208, 11097, 2000, 2198, 2138, 2010, 2735, 2001, 2279, 1012, 102, 3021, 1005, 1055, 2735, 2001, 2279, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:39:31 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [101, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 2010, 2608, 1012, 102, 1996, 2269, 3344, 1996, 5777, 2879, 1999, 1996, 2879, 1005, 1055, 2608, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:40:14 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:40:14 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:40:14 - INFO - __main__ - tensor([[-4.8588, 4.6117], + [ 7.3270, -7.0681], + [ 5.3520, -5.1671], + [ 2.6708, -2.1801], + [ 8.1495, -7.6516], + [-5.7657, 5.4211], + [ 3.5560, -3.3248], + [ 6.5351, -6.1972], + [-5.1211, 5.0975], + [-1.9563, 2.2555], + [-6.9372, 6.5603], + [ 5.1713, -4.8271], + [-6.1756, 5.9661], + [-7.3695, 7.2106], + [-3.0024, 2.9055], + [-0.4271, 0.5758], + [ 7.3406, -7.0201], + [-6.4248, 6.3113], + [-7.5616, 7.5413], + [ 1.2039, -0.9942], + [-4.2347, 4.2231], + [-5.2158, 4.8976], + [-4.5469, 4.5685], + [ 6.5989, -6.2892], + [-5.6412, 5.2930], + [ 4.7570, -4.5034], + [-2.4355, 2.3517], + [-0.5931, 0.7204], + [ 1.1556, -1.4255], + [-3.3162, 3.3504], + [ 6.5356, -6.1288], + [-6.3932, 6.3517], + [ 5.1181, -5.0173], + [ 5.7497, -5.4581], + [-7.2415, 7.0704], + [ 7.1407, -6.7344], + [-8.1179, 7.9292], + [-1.8172, 1.5960], + [-1.4976, 1.4823], + [-2.6175, 2.1951], + [-2.2725, 1.6585], + [-5.6101, 5.3015], + [-5.6258, 5.5699], + [ 6.1067, -5.6620], + [-6.9248, 6.6174], + [-4.5225, 4.5395], + [ 2.9663, -3.0481], + [-7.7863, 7.6976], + [ 7.2179, -6.9959], + [-5.1455, 5.0481], + [-6.7009, 6.3720], + [-3.1665, 3.1075], + [-6.3586, 6.1036], + [ 0.2133, -0.1557], + [ 8.3221, -7.9717], + [-6.7258, 6.6272], + [ 5.8473, -5.4340], + [ 7.2588, -6.8419], + [ 3.7760, -3.6735], + [ 6.9769, -6.5606], + [-6.2848, 5.9893], + [ 6.8439, -6.4884], + [ 4.4060, -4.2322], + [ 0.0452, 0.2021], + [-7.5887, 7.4941], + [ 7.7849, -7.3774], + [ 6.6667, -6.4034], + [-7.0038, 6.8954], + [ 5.1410, -4.9651], + [-7.7751, 7.6454], + [-6.8436, 6.7396]], device='cuda:0') +06/01/2024 12:40:14 - INFO - __main__ - tensor([[[ 60.1904, -46.7515], + [ -46.7514, 60.0883]], + + [[ 25.0260, 2.0818], + [ 2.0818, 23.5480]], + + [[ 27.6601, -10.5159], + [ -10.5158, 24.9712]], + + [[ 38.8249, -29.7078], + [ -29.7078, 38.0320]], + + [[ 17.7728, 12.5202], + [ 12.5202, 18.4069]], + + [[ 19.6214, -1.2077], + [ -1.2077, 19.3805]], + + [[ 218.3067, -202.2584], + [-202.2584, 205.6843]], + + [[ 73.6490, -48.6798], + [ -48.6799, 67.3692]], + + [[ 43.3149, -26.2587], + [ -26.2587, 40.5747]], + + [[ 199.4373, -183.6123], + [-183.6124, 186.6880]], + + [[ 23.1823, -0.5095], + [ -0.5095, 23.8083]], + + [[ 171.0681, -151.8452], + [-151.8451, 162.5312]], + + [[ 23.5242, -3.8968], + [ -3.8968, 22.9535]], + + [[ 17.7210, 8.8426], + [ 8.8426, 17.4816]], + + [[ 27.5062, -18.3991], + [ -18.3992, 25.5969]], + + [[ 38.2763, -32.4590], + [ -32.4590, 36.9796]], + + [[ 17.6581, 8.3505], + [ 8.3505, 17.4874]], + + [[ 26.2354, -4.4395], + [ -4.4395, 24.8288]], + + [[ 18.9990, 9.5113], + [ 9.5113, 18.4382]], + + [[ 99.1154, -90.0724], + [ -90.0724, 92.1456]], + + [[ 50.9613, -36.2555], + [ -36.2555, 47.9015]], + + [[ 34.6061, -17.6655], + [ -17.6655, 32.8433]], + + [[ 49.8898, -35.8735], + [ -35.8735, 47.0109]], + + [[ 33.7764, -10.9583], + [ -10.9583, 31.6103]], + + [[ 26.5808, -8.9474], + [ -8.9474, 26.0458]], + + [[ 54.3446, -41.0464], + [ -41.0464, 53.7091]], + + [[ 40.3644, -33.7919], + [ -33.7919, 40.2110]], + + [[ 95.0507, -86.8912], + [ -86.8913, 87.5269]], + + [[ 57.3307, -49.2159], + [ -49.2158, 52.9049]], + + [[ 72.5550, -62.1102], + [ -62.1102, 68.5283]], + + [[ 43.6298, -22.0094], + [ -22.0094, 44.6098]], + + [[ 22.9512, -1.0558], + [ -1.0558, 22.1737]], + + [[ 60.2311, -40.8324], + [ -40.8324, 51.6456]], + + [[ 62.2954, -42.3514], + [ -42.3514, 58.6817]], + + [[ 16.5631, 8.9800], + [ 8.9800, 16.4664]], + + [[ 27.3089, -3.1867], + [ -3.1867, 27.1644]], + + [[ 17.7478, 13.6145], + [ 13.6145, 17.7567]], + + [[ 88.9077, -81.9654], + [ -81.9654, 88.7968]], + + [[ 21.4844, -15.0267], + [ -15.0267, 20.5880]], + + [[ 38.4157, -30.1238], + [ -30.1238, 37.2866]], + + [[ 27.7753, -20.1133], + [ -20.1133, 27.0685]], + + [[ 28.3119, -11.7046], + [ -11.7046, 27.5579]], + + [[ 79.2220, -59.0010], + [ -59.0010, 73.6120]], + + [[ 25.4126, -6.5088], + [ -6.5088, 24.7202]], + + [[ 23.5089, -0.7071], + [ -0.7071, 23.7978]], + + [[ 51.4005, -35.6621], + [ -35.6621, 45.4349]], + + [[ 161.4131, -144.7284], + [-144.7285, 148.3125]], + + [[ 18.1295, 11.2501], + [ 11.2501, 17.8628]], + + [[ 31.0841, -4.5830], + [ -4.5830, 29.9508]], + + [[ 112.4135, -95.6982], + [ -95.6982, 109.6960]], + + [[ 68.9952, -47.6430], + [ -47.6430, 72.0617]], + + [[ 133.5000, -118.8136], + [-118.8136, 126.2280]], + + [[ 23.6701, -3.5035], + [ -3.5035, 23.8273]], + + [[ 21.6421, -17.6401], + [ -17.6401, 20.5255]], + + [[ 18.1831, 14.0226], + [ 14.0226, 18.2161]], + + [[ 20.7005, 1.9523], + [ 1.9523, 20.1675]], + + [[ 38.8416, -21.9473], + [ -21.9473, 39.2187]], + + [[ 25.8455, -0.3747], + [ -0.3747, 26.5745]], + + [[ 28.5131, -17.5328], + [ -17.5328, 27.6491]], + + [[ 25.5405, -1.5978], + [ -1.5978, 24.6058]], + + [[ 18.7066, 1.0183], + [ 1.0183, 18.2070]], + + [[ 30.2208, -6.4648], + [ -6.4648, 29.4348]], + + [[ 37.1928, -23.8984], + [ -23.8984, 35.3659]], + + [[ 310.3965, -292.3167], + [-292.3165, 289.5253]], + + [[ 18.2796, 10.0007], + [ 10.0007, 17.9455]], + + [[ 18.0353, 10.3908], + [ 10.3908, 18.1956]], + + [[ 71.2161, -44.4064], + [ -44.4064, 63.5942]], + + [[ 21.5526, 4.1095], + [ 4.1095, 20.1128]], + + [[ 39.9157, -22.6142], + [ -22.6142, 36.2971]], + + [[ 18.5234, 10.9066], + [ 10.9066, 18.0909]], + + [[ 26.0981, -2.1714], + [ -2.1714, 24.9529]]], device='cuda:0') +06/01/2024 12:40:14 - INFO - __main__ - ***** Completed training ***** diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..b0c10644be30d0bf4603599bc6a7c42f5f1c54bb --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c630998e9e716cd6843d4e0b6fbb27eb8bbe6783d3df7d29292ef7e33a73221c +size 37 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..162b44897857cb39f44a235de398685e886a2528 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e39dedcc17e7eef0765bc19a1fa8d725118c00d04578e84b70fdcc48cae926b +size 11865 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..c5c480effe268d3a36d4733f6e866ef429ea09bc --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9976245c9934636cd4236439a9fbac4a25a61adbe04630fb3b5cf9c439dbec82 +size 6087 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..472891d69ad1332469ec9b6cfbdacbf204c41b17 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7bebfa983638752d3ce93b99f4b10291972a0753fb25be91337afc9ce2505d4 +size 38 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..0a2ede710d4f79bb8bb91a5fc9debe28238e17b2 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1938ad1144d0263299e36f2762bc747251903ba499e5959c946dd039d0947de9 +size 11888 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..dd2e46737d2ef6b027b4d0f9cc80f86c655ef2dc --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bde8de8430eaba84fa6fe6e03aacf1feec1264f893966ea1de8c9f45fbff73ce +size 6132 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..c5a693b80334736da391198918b8c83529fb0258 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92cec47444f6780cd07d7efbc414f88f83e0197f052811f17823eb6dada84d2d +size 38 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..74e1e7cc9d05fb1ecb1f467ffbc338022a1666f9 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:916af724689bb58a7260af4f15f89521020b99cbcf5c262e820adbd30e5f30b6 +size 11928 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..fc508399d3cff18aa54dfebed86321b4aac46525 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb9f466ea5c33679d1c34759d72701177c27b951a47a8f3206fef0fb3ee96751 +size 6142 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..c5a693b80334736da391198918b8c83529fb0258 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92cec47444f6780cd07d7efbc414f88f83e0197f052811f17823eb6dada84d2d +size 38 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..35319b0e29b72a329719a8d906ded2360759ac29 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9b8b1b9141a2851fd15857ea20bcf4334c058f02015747cd5b5762425cb9bc +size 11965 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..153c786249a05ad5d3ce3ecea0c70f200a3f2d99 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc1b1548ef50df3be90500dd3f3719fdd9e1035060594d32607ae32f9de5ccc7 +size 6151 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..a7306d61178e98cf409c80257b4be8f6ecb0a8a1 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:282af2675a6b17fe8dab9709190925905e7286d2d529652fee8e1bf5007a9980 +size 38 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0ec87253faaba4b938795ede738a5b61bdcefb --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bf27a781fe2f45d757f0c88bbd6c3a27c3e1bf6bdc973650c8f2d4a2aa0b1e0 +size 11962 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..cec29610d36b3d7c09eff04571c4fa21119ba74c --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f670747f9076221449ccf955f29c24400e559f30e41f3e8d678083c87abeed8b +size 6141 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..c5a693b80334736da391198918b8c83529fb0258 --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92cec47444f6780cd07d7efbc414f88f83e0197f052811f17823eb6dada84d2d +size 38 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..39fced89120ae26d11def621e67a1971e653607b --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1185ed06deb477571ad6912a291d32ab8ba422f19eba4001168acdc3e2f70f9b +size 11969 diff --git a/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..a6570f2fc11bb22d0921e1b66198832359453c8e --- /dev/null +++ b/outputs/wnli/bert-base-uncased_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d39d1c063536e7c30e60ac1970032bec5c27c23c4f2ff531765842d5b28d57af +size 6154 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log new file mode 100644 index 0000000000000000000000000000000000000000..6b8e30e8886a9c8a0550043805df5757ceaaac33 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/logfile_la_{args.laplace_sub}.log @@ -0,0 +1,2394 @@ +06/01/2024 12:41:23 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:41:23 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:41:36 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:41:37 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/adapter_config.json +06/01/2024 12:41:37 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:41:37 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_adapter.bin +06/01/2024 12:41:37 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/head_config.json +06/01/2024 12:41:37 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:41:37 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/pytorch_model_head.bin +06/01/2024 12:41:37 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:41:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:41:37 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:41:37 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:41:37 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:41:37 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:41:41 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [0, 133, 343, 1676, 2262, 3179, 5, 14553, 10, 6687, 142, 51, 9741, 1476, 4, 2, 2, 133, 343, 1676, 2262, 9741, 1476, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:41:41 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [0, 19993, 1595, 5, 177, 9902, 7, 610, 142, 39, 1004, 21, 220, 4, 2, 2, 19993, 18, 1004, 21, 220, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:41:41 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [0, 133, 1150, 2584, 5, 8416, 2143, 11, 39, 3701, 4, 2, 2, 133, 1150, 2584, 5, 8416, 2143, 11, 5, 2143, 18, 3701, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:42:23 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:42:23 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:42:23 - INFO - __main__ - tensor([[-0.0938, 0.1189], + [-0.1582, -0.0418], + [-0.0808, 0.1495], + [-0.0854, 0.1448], + [-0.0874, 0.0958], + [-0.1845, -0.0530], + [-0.1691, -0.0464], + [-0.0770, 0.0839], + [-0.0961, 0.1252], + [-0.0841, 0.1444], + [-0.0880, 0.0926], + [-0.0990, 0.1035], + [-0.1002, 0.0704], + [-0.0794, 0.1154], + [-0.1062, -0.0848], + [-0.0922, 0.0968], + [-0.1053, 0.1138], + [-0.0652, 0.0989], + [-0.1054, 0.0652], + [-0.1182, 0.1004], + [-0.0850, 0.1393], + [-0.1168, 0.0453], + [-0.1411, 0.0622], + [-0.0770, 0.0985], + [-0.0862, -0.0601], + [-0.1290, 0.0680], + [-0.1289, 0.0954], + [-0.1136, 0.0936], + [-0.1012, 0.1062], + [-0.0833, 0.1398], + [-0.0950, 0.1130], + [-0.0757, 0.1484], + [-0.1706, -0.0531], + [-0.0856, 0.0990], + [-0.0854, 0.0922], + [-0.0993, 0.0991], + [-0.0917, 0.1151], + [-0.1005, 0.1018], + [-0.0870, 0.1461], + [-0.0940, 0.1148], + [-0.0924, 0.1171], + [-0.1454, 0.0657], + [-0.0956, 0.0924], + [-0.0898, 0.1020], + [-0.1102, 0.1026], + [-0.0768, 0.1394], + [-0.0835, 0.0935], + [-0.0846, 0.0807], + [-0.0891, 0.0952], + [-0.0902, 0.0975], + [-0.0853, 0.0960], + [-0.0836, 0.1423], + [-0.1339, 0.0663], + [-0.1237, -0.0463], + [-0.1123, 0.0969], + [-0.1070, -0.0132], + [-0.0924, 0.0935], + [-0.0924, 0.1077], + [-0.0654, 0.0384], + [-0.1118, 0.1043], + [-0.0884, 0.0750], + [-0.1138, 0.0904], + [-0.0905, 0.0875], + [-0.0922, 0.1576], + [-0.1091, 0.0868], + [-0.0913, 0.0913], + [-0.1114, 0.1052], + [-0.0811, 0.0952], + [-0.0872, 0.0807], + [-0.0903, 0.1489], + [-0.1257, 0.0751]], device='cuda:0') +06/01/2024 12:42:23 - INFO - __main__ - tensor([[[21.6699, 21.5334], + [21.5333, 21.7148]], + + [[26.6715, 25.7631], + [25.7630, 26.6380]], + + [[24.5601, 24.3658], + [24.3658, 24.5726]], + + [[23.7898, 23.5931], + [23.5931, 23.8141]], + + [[21.8565, 21.6399], + [21.6399, 21.8447]], + + [[26.3967, 25.2160], + [25.2160, 26.3905]], + + [[23.0043, 22.1771], + [22.1771, 23.1480]], + + [[22.9756, 22.3382], + [22.3383, 23.0542]], + + [[21.8576, 21.6049], + [21.6049, 21.8567]], + + [[22.9705, 22.7798], + [22.7798, 22.9701]], + + [[23.5460, 23.2075], + [23.2075, 23.5679]], + + [[23.4387, 23.1909], + [23.1909, 23.4291]], + + [[22.3092, 22.0790], + [22.0790, 22.3116]], + + [[23.4030, 23.1632], + [23.1632, 23.4115]], + + [[23.7942, 22.9885], + [22.9885, 25.5141]], + + [[23.7266, 23.4064], + [23.4064, 23.7457]], + + [[23.2256, 22.8950], + [22.8950, 23.2417]], + + [[23.6529, 23.2656], + [23.2656, 23.6620]], + + [[23.1093, 22.8708], + [22.8708, 23.1226]], + + [[23.5586, 23.1730], + [23.1730, 23.6086]], + + [[24.2196, 23.9445], + [23.9445, 24.2089]], + + [[21.7176, 21.0624], + [21.0624, 22.0341]], + + [[23.2238, 22.6802], + [22.6802, 23.1566]], + + [[22.2286, 22.0016], + [22.0016, 22.2199]], + + [[25.8599, 24.7729], + [24.7729, 25.7265]], + + [[23.3565, 23.0124], + [23.0124, 23.4048]], + + [[22.5842, 22.2187], + [22.2188, 22.5772]], + + [[23.0204, 22.6937], + [22.6937, 23.0142]], + + [[25.3877, 24.8341], + [24.8341, 25.4241]], + + [[23.7296, 23.5635], + [23.5635, 23.7275]], + + [[23.5643, 23.2437], + [23.2437, 23.5347]], + + [[22.4745, 22.2883], + [22.2883, 22.4787]], + + [[27.3473, 24.9759], + [24.9759, 27.3560]], + + [[23.0264, 22.4729], + [22.4729, 23.0701]], + + [[22.2283, 21.8764], + [21.8764, 22.2341]], + + [[22.9012, 22.6513], + [22.6513, 22.9064]], + + [[23.6090, 23.3635], + [23.3635, 23.6012]], + + [[25.0045, 24.4860], + [24.4860, 25.0331]], + + [[23.1648, 22.9653], + [22.9653, 23.1627]], + + [[21.3723, 21.2545], + [21.2545, 21.5064]], + + [[21.7102, 21.5730], + [21.5730, 21.7509]], + + [[22.7845, 22.3228], + [22.3228, 22.8001]], + + [[23.6437, 23.4359], + [23.4359, 23.6504]], + + [[22.6134, 22.2984], + [22.2984, 22.6278]], + + [[23.3693, 23.0309], + [23.0310, 23.3300]], + + [[22.9383, 22.7591], + [22.7591, 22.9513]], + + [[22.5741, 22.3647], + [22.3647, 22.5819]], + + [[23.2365, 23.0049], + [23.0049, 23.2739]], + + [[23.5976, 23.3569], + [23.3569, 23.5925]], + + [[24.6231, 24.2309], + [24.2309, 24.6306]], + + [[22.4761, 22.2842], + [22.2842, 22.4836]], + + [[24.4802, 24.2022], + [24.2022, 24.4586]], + + [[20.4334, 20.1190], + [20.1190, 20.4580]], + + [[23.0503, 21.6660], + [21.6660, 22.7100]], + + [[22.5557, 22.2485], + [22.2484, 22.5410]], + + [[22.2894, 21.7176], + [21.7176, 22.6090]], + + [[23.1156, 22.7724], + [22.7723, 23.1249]], + + [[23.2655, 22.9090], + [22.9090, 23.2678]], + + [[20.5725, 19.8586], + [19.8586, 20.6333]], + + [[22.4033, 22.1442], + [22.1442, 22.3971]], + + [[24.0678, 23.6762], + [23.6762, 24.1000]], + + [[22.1562, 21.8215], + [21.8215, 22.1403]], + + [[23.2561, 22.9651], + [22.9651, 23.2699]], + + [[23.9115, 23.6937], + [23.6937, 23.9096]], + + [[23.4677, 23.1581], + [23.1581, 23.4792]], + + [[23.3556, 23.1498], + [23.1498, 23.3751]], + + [[22.9643, 22.7002], + [22.7002, 22.9272]], + + [[25.5432, 24.9712], + [24.9712, 25.5550]], + + [[23.1594, 22.8854], + [22.8854, 23.1766]], + + [[23.9437, 23.7771], + [23.7771, 23.9622]], + + [[22.9760, 22.5377], + [22.5377, 22.9640]]], device='cuda:0') +06/01/2024 12:42:23 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:42:28 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:42:28 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:42:29 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:42:30 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/adapter_config.json +06/01/2024 12:42:30 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:42:30 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_adapter.bin +06/01/2024 12:42:31 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/head_config.json +06/01/2024 12:42:31 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:42:31 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/pytorch_model_head.bin +06/01/2024 12:42:31 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:42:31 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:42:31 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:42:31 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:42:31 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:42:31 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:42:34 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [0, 133, 343, 1676, 2262, 3179, 5, 14553, 10, 6687, 142, 51, 9741, 1476, 4, 2, 2, 133, 343, 1676, 2262, 9741, 1476, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:42:34 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [0, 19993, 1595, 5, 177, 9902, 7, 610, 142, 39, 1004, 21, 220, 4, 2, 2, 19993, 18, 1004, 21, 220, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:42:34 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [0, 133, 1150, 2584, 5, 8416, 2143, 11, 39, 3701, 4, 2, 2, 133, 1150, 2584, 5, 8416, 2143, 11, 5, 2143, 18, 3701, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:43:15 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:43:15 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:43:15 - INFO - __main__ - tensor([[-4.3223e-01, 3.5479e-01], + [-3.5601e-02, -1.4892e-01], + [ 4.1116e-01, -5.0305e-01], + [ 1.8463e-01, -1.6883e-01], + [ 3.0279e-01, -2.7911e-01], + [-5.5205e-01, 3.0692e-01], + [-3.2298e-01, 1.0672e-01], + [ 4.3357e-01, -5.3033e-01], + [-5.8233e-01, 4.0431e-01], + [ 2.2766e-01, -2.2129e-01], + [-6.3045e-01, 4.9412e-01], + [-1.3751e-01, -9.3528e-04], + [-7.5120e-01, 4.8296e-01], + [-1.7249e+00, 1.6221e+00], + [-8.0658e-01, 5.3091e-01], + [-5.0117e-01, 2.8408e-01], + [ 2.6806e+00, -2.6614e+00], + [-3.7891e-01, 3.5889e-01], + [-1.6999e-01, 5.8887e-02], + [ 9.6097e-02, -2.0370e-01], + [ 3.6435e-02, -1.8741e-02], + [-1.3211e+00, 1.1893e+00], + [-2.2805e-01, 1.6008e-01], + [-2.0728e-01, 1.2867e-02], + [-5.9582e-01, 5.0555e-01], + [ 2.7278e-01, -4.2044e-01], + [ 1.3812e-01, -2.3251e-01], + [-3.5067e-01, 2.0419e-01], + [-2.1851e-01, 1.8414e-01], + [-2.1137e-01, 1.2645e-01], + [ 1.0213e+00, -1.0617e+00], + [-1.6379e-01, 1.9955e-01], + [ 2.6567e-01, -3.6060e-01], + [ 1.1774e+00, -1.3346e+00], + [-1.3172e+00, 1.2234e+00], + [ 5.6633e-01, -6.1932e-01], + [-1.6921e+00, 1.5718e+00], + [ 1.5483e-01, -1.5236e-01], + [ 9.7061e-02, -9.2333e-02], + [-4.7105e-01, 4.2723e-01], + [-3.6506e-01, 3.0864e-01], + [-6.7141e-01, 6.7399e-01], + [-4.1091e-01, 2.6102e-01], + [ 7.6857e-01, -8.0409e-01], + [-8.0844e-01, 6.6226e-01], + [ 1.4694e-01, -1.3043e-01], + [ 9.2010e-02, -1.2484e-01], + [-1.8225e+00, 1.6838e+00], + [ 4.8661e-01, -3.7628e-01], + [-3.9637e-01, 3.0179e-01], + [-1.8684e-01, 1.2435e-01], + [-6.9133e-02, 8.2504e-02], + [-7.8513e-01, 6.4662e-01], + [-3.8921e-01, 2.5711e-01], + [ 9.4663e-01, -9.2069e-01], + [-1.8838e+00, 1.6530e+00], + [ 8.2019e-01, -9.7471e-01], + [ 9.3414e-01, -9.7474e-01], + [ 2.8287e-01, -3.2549e-01], + [ 1.1875e+00, -1.2406e+00], + [-1.1064e+00, 7.4843e-01], + [ 9.6745e-01, -9.3292e-01], + [-8.0580e-03, -1.0385e-01], + [ 6.2600e-01, -5.7156e-01], + [-7.6923e-01, 5.9834e-01], + [ 5.7483e-01, -5.8466e-01], + [ 5.6202e-02, -1.0682e-01], + [-1.1252e+00, 1.0484e+00], + [ 6.6008e-01, -6.9666e-01], + [-2.2744e-01, 2.2012e-01], + [-3.3884e-01, 2.4878e-01]], device='cuda:0') +06/01/2024 12:43:15 - INFO - __main__ - tensor([[[ 5.4196, 2.7924], + [ 2.7924, 5.3020]], + + [[ 5.5312, 2.9522], + [ 2.9522, 5.3658]], + + [[ 4.4609, 1.6729], + [ 1.6729, 4.4070]], + + [[ 3.1837, 2.3280], + [ 2.3280, 3.1788]], + + [[ 5.4907, 2.1025], + [ 2.1025, 5.3328]], + + [[ 4.5784, 2.4649], + [ 2.4649, 4.4413]], + + [[ 5.3772, 3.3821], + [ 3.3821, 5.2365]], + + [[ 5.2776, 2.7227], + [ 2.7227, 5.1733]], + + [[ 5.4303, 2.3093], + [ 2.3093, 5.2110]], + + [[ 3.2815, 2.1959], + [ 2.1959, 3.2374]], + + [[ 4.5989, 2.7662], + [ 2.7662, 4.5573]], + + [[ 4.7367, 2.6495], + [ 2.6495, 4.6835]], + + [[ 5.0747, 2.9370], + [ 2.9370, 4.9595]], + + [[ 7.2098, 3.7772], + [ 3.7772, 7.0386]], + + [[ 8.8478, 0.6186], + [ 0.6186, 8.1217]], + + [[ 4.2850, 2.5836], + [ 2.5836, 4.2270]], + + [[19.0898, -2.0203], + [-2.0203, 18.4463]], + + [[ 4.9217, 3.3957], + [ 3.3957, 4.8369]], + + [[ 5.3406, 3.0215], + [ 3.0215, 5.2335]], + + [[ 4.9330, 2.2367], + [ 2.2367, 4.6695]], + + [[ 3.0276, 2.3454], + [ 2.3454, 3.0079]], + + [[ 5.5108, 2.8829], + [ 2.8829, 5.3521]], + + [[ 4.4462, 2.8724], + [ 2.8724, 4.3097]], + + [[ 5.6782, 3.0607], + [ 3.0607, 5.4962]], + + [[ 5.2507, 3.3067], + [ 3.3067, 5.1014]], + + [[ 4.8457, 2.7262], + [ 2.7262, 4.7901]], + + [[ 4.7605, 2.8843], + [ 2.8843, 4.6778]], + + [[ 4.4174, 2.8190], + [ 2.8190, 4.3224]], + + [[ 4.3572, 2.2216], + [ 2.2216, 4.1766]], + + [[ 4.5492, 2.8893], + [ 2.8893, 4.4050]], + + [[ 6.0870, 3.3977], + [ 3.3977, 5.9622]], + + [[ 3.1710, 2.2086], + [ 2.2086, 3.1397]], + + [[ 5.3753, 1.5958], + [ 1.5958, 5.1816]], + + [[ 7.2855, 2.0946], + [ 2.0946, 7.0949]], + + [[ 5.4881, 3.2577], + [ 3.2577, 5.5371]], + + [[ 6.3893, 1.2201], + [ 1.2201, 6.1875]], + + [[ 7.0756, 3.7230], + [ 3.7230, 6.8887]], + + [[ 4.5971, 1.9416], + [ 1.9416, 4.3989]], + + [[ 3.1490, 2.2843], + [ 2.2843, 3.1126]], + + [[ 5.1419, 2.9154], + [ 2.9154, 5.0105]], + + [[ 5.3446, 2.5500], + [ 2.5500, 5.1914]], + + [[ 4.6476, 2.7956], + [ 2.7956, 4.5999]], + + [[ 5.9116, 2.2871], + [ 2.2871, 5.6326]], + + [[ 6.3161, 1.8566], + [ 1.8566, 6.0152]], + + [[ 4.8876, 3.0091], + [ 3.0091, 4.8143]], + + [[ 3.2994, 2.4228], + [ 2.4228, 3.2754]], + + [[ 5.1156, 2.4865], + [ 2.4865, 4.9036]], + + [[ 6.7863, 3.7168], + [ 3.7168, 6.6777]], + + [[ 5.4414, 3.2510], + [ 3.2510, 5.2926]], + + [[ 4.7557, 2.5335], + [ 2.5335, 4.6715]], + + [[ 4.8958, 2.6884], + [ 2.6884, 4.7332]], + + [[ 3.0284, 2.2983], + [ 2.2983, 3.0031]], + + [[ 5.5157, 3.0877], + [ 3.0877, 5.4522]], + + [[ 4.8496, 2.0991], + [ 2.0991, 4.6700]], + + [[ 6.6621, 3.0008], + [ 3.0008, 6.4378]], + + [[ 6.5731, 3.6233], + [ 3.6233, 6.4765]], + + [[ 9.0433, 1.6075], + [ 1.6075, 8.4477]], + + [[ 6.0236, 3.4049], + [ 3.4049, 5.9095]], + + [[ 5.6336, 1.9166], + [ 1.9166, 5.2728]], + + [[ 9.8941, 0.1300], + [ 0.1300, 9.3535]], + + [[ 5.8223, 3.2527], + [ 3.2527, 5.6460]], + + [[ 6.7284, 2.9546], + [ 2.9546, 6.4940]], + + [[ 6.2583, 0.4515], + [ 0.4515, 5.6284]], + + [[ 3.7716, 2.4013], + [ 2.4013, 3.7290]], + + [[ 6.0505, 1.8398], + [ 1.8398, 5.7856]], + + [[ 5.4686, 2.4325], + [ 2.4325, 5.2793]], + + [[ 5.2060, 2.8566], + [ 2.8566, 5.0780]], + + [[ 5.0110, 2.3658], + [ 2.3658, 4.8912]], + + [[ 6.0966, 2.9297], + [ 2.9297, 5.8643]], + + [[ 3.1453, 2.4400], + [ 2.4400, 3.1303]], + + [[ 4.5286, 2.9302], + [ 2.9302, 4.3973]]], device='cuda:0') +06/01/2024 12:43:15 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:43:20 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:43:20 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:43:21 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:43:22 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/adapter_config.json +06/01/2024 12:43:22 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:43:22 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_adapter.bin +06/01/2024 12:43:22 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/head_config.json +06/01/2024 12:43:22 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:43:22 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/pytorch_model_head.bin +06/01/2024 12:43:22 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:43:22 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:43:22 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:43:22 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:43:22 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:43:22 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:43:25 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [0, 133, 343, 1676, 2262, 3179, 5, 14553, 10, 6687, 142, 51, 9741, 1476, 4, 2, 2, 133, 343, 1676, 2262, 9741, 1476, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:43:25 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [0, 19993, 1595, 5, 177, 9902, 7, 610, 142, 39, 1004, 21, 220, 4, 2, 2, 19993, 18, 1004, 21, 220, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:43:25 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [0, 133, 1150, 2584, 5, 8416, 2143, 11, 39, 3701, 4, 2, 2, 133, 1150, 2584, 5, 8416, 2143, 11, 5, 2143, 18, 3701, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:44:07 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:44:07 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:44:07 - INFO - __main__ - tensor([[-1.2438, 1.1851], + [-1.1913, 1.0224], + [ 1.3718, -1.4265], + [-0.1603, 0.1493], + [ 2.0318, -1.8870], + [-2.4498, 2.1625], + [-0.3757, 0.2060], + [ 0.0652, -0.1731], + [-4.7646, 4.3950], + [-1.0295, 0.9890], + [-2.3345, 2.0926], + [-0.8792, 0.7774], + [-3.4578, 3.0776], + [-4.9532, 4.7789], + [-3.5654, 3.2977], + [-2.6600, 2.3920], + [ 5.6597, -5.4646], + [-1.7992, 1.7737], + [ 2.4906, -2.3909], + [-0.7098, 0.6060], + [-1.3729, 1.3327], + [-4.4244, 4.1914], + [-0.5853, 0.5388], + [ 1.7534, -1.7625], + [-1.5776, 1.4199], + [-0.4180, 0.2304], + [-0.0133, -0.0307], + [-0.9436, 0.7655], + [-2.8085, 2.7219], + [-0.2901, 0.2379], + [ 2.2589, -2.1590], + [-0.5504, 0.5849], + [-0.3369, 0.2473], + [ 2.7605, -2.8232], + [-4.0071, 3.9280], + [ 3.8413, -3.6885], + [-5.0925, 4.9319], + [ 0.8260, -0.7363], + [-2.4587, 2.3564], + [-2.3424, 2.2544], + [-2.1619, 2.0612], + [-1.4216, 1.4824], + [ 1.5184, -1.5075], + [ 0.9852, -0.9888], + [-2.2618, 2.0993], + [ 4.2948, -4.1065], + [ 3.4809, -3.1310], + [-4.0414, 3.8228], + [-0.2245, 0.3579], + [-3.1092, 2.9790], + [-3.6566, 3.4346], + [-2.7179, 2.6081], + [-2.3066, 2.1852], + [-1.5694, 1.3550], + [ 1.6332, -1.5460], + [-6.0553, 5.7436], + [ 5.3999, -5.3415], + [ 1.7327, -1.6415], + [-0.0474, 0.0650], + [ 4.9318, -4.7368], + [-4.3810, 3.9747], + [ 2.1440, -2.0273], + [-0.0825, 0.1042], + [ 2.1562, -2.0255], + [-2.2978, 2.0989], + [ 1.7588, -1.6652], + [ 0.0816, -0.1039], + [-4.7880, 4.5865], + [ 3.4206, -3.2555], + [-2.1845, 2.1212], + [-1.0545, 0.9867]], device='cuda:0') +06/01/2024 12:44:07 - INFO - __main__ - tensor([[[ 1.9253e+01, -1.2690e+01], + [-1.2690e+01, 1.8147e+01]], + + [[ 1.0021e+01, -4.2728e+00], + [-4.2729e+00, 9.3458e+00]], + + [[ 1.7439e+01, -1.1984e+01], + [-1.1984e+01, 1.5718e+01]], + + [[ 2.0514e+01, -1.6531e+01], + [-1.6531e+01, 1.9336e+01]], + + [[ 4.9435e+01, -4.1110e+01], + [-4.1110e+01, 4.4852e+01]], + + [[ 9.6058e+00, -3.2335e+00], + [-3.2335e+00, 9.2245e+00]], + + [[ 1.5021e+01, -9.0083e+00], + [-9.0083e+00, 1.3046e+01]], + + [[ 9.6372e+00, -4.8272e+00], + [-4.8272e+00, 8.7067e+00]], + + [[ 1.7832e+01, -3.5004e+00], + [-3.5004e+00, 1.6849e+01]], + + [[ 1.5713e+01, -1.1294e+01], + [-1.1294e+01, 1.4534e+01]], + + [[ 9.4922e+00, -2.4156e+00], + [-2.4156e+00, 8.9622e+00]], + + [[ 2.6772e+01, -2.1067e+01], + [-2.1067e+01, 2.4526e+01]], + + [[ 9.0701e+00, -3.6769e-01], + [-3.6769e-01, 8.8483e+00]], + + [[ 1.4135e+01, 1.2548e+00], + [ 1.2548e+00, 1.3527e+01]], + + [[ 2.6611e+01, -1.5847e+01], + [-1.5847e+01, 2.5502e+01]], + + [[ 8.4412e+00, -1.7199e+00], + [-1.7199e+00, 8.3622e+00]], + + [[ 1.1686e+01, 8.3298e+00], + [ 8.3298e+00, 1.1532e+01]], + + [[ 7.8380e+00, -1.2608e+00], + [-1.2608e+00, 7.2837e+00]], + + [[ 2.4754e+01, -1.5778e+01], + [-1.5778e+01, 2.1871e+01]], + + [[ 1.4154e+01, -9.2872e+00], + [-9.2872e+00, 1.2214e+01]], + + [[ 8.1396e+00, -3.2802e+00], + [-3.2802e+00, 7.6320e+00]], + + [[ 1.1441e+01, 7.4360e-01], + [ 7.4360e-01, 1.1196e+01]], + + [[ 5.9907e+00, -1.5828e+00], + [-1.5828e+00, 5.4955e+00]], + + [[ 2.5307e+01, -1.7377e+01], + [-1.7377e+01, 2.2182e+01]], + + [[ 7.5276e+00, -6.9771e-01], + [-6.9771e-01, 7.0285e+00]], + + [[ 5.2450e+00, -1.4850e+00], + [-1.4850e+00, 5.0337e+00]], + + [[ 2.0626e+01, -1.5479e+01], + [-1.5479e+01, 1.8352e+01]], + + [[ 5.1083e+00, -7.2196e-01], + [-7.2196e-01, 4.7227e+00]], + + [[ 1.0955e+01, -2.7710e+00], + [-2.7710e+00, 1.0524e+01]], + + [[ 6.8466e+00, -2.2875e+00], + [-2.2875e+00, 6.2211e+00]], + + [[ 1.0958e+01, -3.1540e+00], + [-3.1540e+00, 1.0036e+01]], + + [[ 2.4878e+01, -2.0360e+01], + [-2.0360e+01, 2.2910e+01]], + + [[ 2.5195e+01, -2.0123e+01], + [-2.0123e+01, 2.2942e+01]], + + [[ 1.3684e+01, -4.4299e+00], + [-4.4299e+00, 1.2821e+01]], + + [[ 1.2510e+01, -1.7088e+00], + [-1.7087e+00, 1.2359e+01]], + + [[ 2.3099e+01, -1.0276e+01], + [-1.0276e+01, 2.2319e+01]], + + [[ 1.3425e+01, 2.4868e+00], + [ 2.4868e+00, 1.2740e+01]], + + [[ 3.4390e+01, -2.8591e+01], + [-2.8591e+01, 3.0865e+01]], + + [[ 7.9280e+00, -1.1058e+00], + [-1.1058e+00, 7.5333e+00]], + + [[ 6.3778e+00, 7.3518e-01], + [ 7.3518e-01, 6.2500e+00]], + + [[ 6.7479e+00, -3.2609e-02], + [-3.2610e-02, 6.5574e+00]], + + [[ 1.7961e+01, -1.2379e+01], + [-1.2379e+01, 1.6709e+01]], + + [[ 1.8117e+02, -1.6311e+02], + [-1.6311e+02, 1.5673e+02]], + + [[ 1.8773e+01, -1.2588e+01], + [-1.2588e+01, 1.6451e+01]], + + [[ 1.7784e+01, -1.0366e+01], + [-1.0366e+01, 1.6313e+01]], + + [[ 4.0924e+01, -2.5081e+01], + [-2.5081e+01, 3.8238e+01]], + + [[ 1.1484e+02, -9.9143e+01], + [-9.9143e+01, 1.0474e+02]], + + [[ 7.0759e+01, -5.6290e+01], + [-5.6290e+01, 6.4958e+01]], + + [[ 8.8728e+00, -3.4567e+00], + [-3.4567e+00, 8.2038e+00]], + + [[ 1.0379e+01, -1.3934e+00], + [-1.3934e+00, 9.9514e+00]], + + [[ 1.3769e+01, -2.8110e+00], + [-2.8110e+00, 1.3189e+01]], + + [[ 1.1817e+01, -3.7261e+00], + [-3.7261e+00, 1.0995e+01]], + + [[ 1.3168e+01, -5.9157e+00], + [-5.9158e+00, 1.2938e+01]], + + [[ 7.9221e+00, -3.1499e+00], + [-3.1499e+00, 7.3754e+00]], + + [[ 1.4404e+01, -7.1941e+00], + [-7.1941e+00, 1.2535e+01]], + + [[ 1.4036e+01, 5.7520e+00], + [ 5.7520e+00, 1.4018e+01]], + + [[ 2.4299e+01, -4.3870e+00], + [-4.3870e+00, 2.2343e+01]], + + [[ 1.1950e+01, -4.7780e+00], + [-4.7780e+00, 1.0716e+01]], + + [[ 1.9385e+01, -1.3689e+01], + [-1.3689e+01, 1.6485e+01]], + + [[ 4.0199e+01, -2.3078e+01], + [-2.3078e+01, 3.8789e+01]], + + [[ 1.1722e+01, 6.3749e-01], + [ 6.3750e-01, 1.1947e+01]], + + [[ 1.7698e+01, -9.3163e+00], + [-9.3163e+00, 1.5432e+01]], + + [[ 3.1238e+01, -2.6089e+01], + [-2.6089e+01, 2.8475e+01]], + + [[ 2.7947e+01, -2.0503e+01], + [-2.0503e+01, 2.5706e+01]], + + [[ 1.1164e+01, -4.6384e+00], + [-4.6384e+00, 1.0495e+01]], + + [[ 2.4399e+01, -1.7127e+01], + [-1.7127e+01, 2.2092e+01]], + + [[ 1.2454e+01, -6.6578e+00], + [-6.6578e+00, 1.1192e+01]], + + [[ 2.4324e+01, -9.4750e+00], + [-9.4750e+00, 2.2838e+01]], + + [[ 3.3398e+01, -2.1644e+01], + [-2.1644e+01, 3.0677e+01]], + + [[ 9.0396e+00, -2.4659e+00], + [-2.4659e+00, 8.6748e+00]], + + [[ 7.3926e+00, -2.3884e+00], + [-2.3884e+00, 6.7720e+00]]], device='cuda:0') +06/01/2024 12:44:08 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:44:12 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:44:12 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:44:13 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:44:14 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/adapter_config.json +06/01/2024 12:44:14 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:44:14 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_adapter.bin +06/01/2024 12:44:14 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/head_config.json +06/01/2024 12:44:14 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:44:14 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/pytorch_model_head.bin +06/01/2024 12:44:14 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:44:14 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:44:14 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:44:14 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:44:14 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:44:14 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:44:18 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [0, 133, 343, 1676, 2262, 3179, 5, 14553, 10, 6687, 142, 51, 9741, 1476, 4, 2, 2, 133, 343, 1676, 2262, 9741, 1476, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:44:18 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [0, 19993, 1595, 5, 177, 9902, 7, 610, 142, 39, 1004, 21, 220, 4, 2, 2, 19993, 18, 1004, 21, 220, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:44:18 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [0, 133, 1150, 2584, 5, 8416, 2143, 11, 39, 3701, 4, 2, 2, 133, 1150, 2584, 5, 8416, 2143, 11, 5, 2143, 18, 3701, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:45:02 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:45:02 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:45:02 - INFO - __main__ - tensor([[-2.3734, 2.3152], + [-2.6693, 2.4772], + [ 2.1340, -2.1342], + [ 0.5885, -0.5739], + [ 4.3195, -4.0464], + [-5.0618, 4.7688], + [ 1.6453, -1.6535], + [ 0.1194, -0.1989], + [-5.8391, 5.4700], + [-0.7546, 0.7157], + [-4.4447, 4.1409], + [-2.0977, 1.9606], + [-6.1110, 5.7249], + [-6.3258, 6.1003], + [-5.8301, 5.5814], + [-4.4496, 4.1961], + [ 5.3114, -5.1320], + [-2.8633, 2.7981], + [ 3.7978, -3.6261], + [-2.1512, 1.9902], + [-1.8677, 1.8070], + [-6.2575, 5.9758], + [ 0.1197, -0.0897], + [ 2.3530, -2.3521], + [-3.4148, 3.2102], + [-0.9454, 0.7543], + [-1.2350, 1.1495], + [-1.7371, 1.5180], + [-5.3294, 5.2207], + [ 1.1123, -1.0587], + [ 3.2981, -3.1301], + [-0.3469, 0.3973], + [-1.9203, 1.7647], + [ 3.6089, -3.6355], + [-6.1767, 6.0368], + [ 5.1550, -4.9919], + [-6.3514, 6.1442], + [-0.0172, 0.0775], + [-4.2947, 4.0934], + [-3.9677, 3.8611], + [-3.6170, 3.5000], + [-1.7731, 1.8537], + [ 3.0136, -2.9482], + [ 0.8598, -0.8997], + [-4.0092, 3.8267], + [ 3.0747, -2.9668], + [ 1.3565, -1.1639], + [-6.2447, 5.9740], + [ 1.8711, -1.5791], + [-5.3498, 5.1427], + [-5.8442, 5.5795], + [-5.0207, 4.7836], + [-5.3603, 5.2024], + [-2.1920, 1.9604], + [ 2.0361, -1.9085], + [-5.6950, 5.4646], + [ 5.5960, -5.4724], + [ 2.5386, -2.3972], + [-1.7417, 1.6562], + [ 5.3813, -5.1819], + [-6.1513, 5.8364], + [ 3.0488, -2.8765], + [ 2.8129, -2.6703], + [ 3.7721, -3.6068], + [-4.0500, 3.7756], + [ 1.9392, -1.8133], + [ 1.0592, -1.0120], + [-5.7550, 5.5482], + [ 5.2332, -5.0216], + [-4.4285, 4.2937], + [-0.5517, 0.5523]], device='cuda:0') +06/01/2024 12:45:02 - INFO - __main__ - tensor([[[ 58.6316, -47.0908], + [ -47.0907, 55.1318]], + + [[ 41.5202, -30.5076], + [ -30.5076, 38.9411]], + + [[ 50.1614, -40.5058], + [ -40.5058, 44.2524]], + + [[ 75.2841, -68.0750], + [ -68.0750, 69.7023]], + + [[ 109.7720, -91.1152], + [ -91.1152, 101.4272]], + + [[ 29.9033, -12.6032], + [ -12.6032, 29.7234]], + + [[ 71.2761, -59.4279], + [ -59.4279, 62.2078]], + + [[ 31.9636, -25.0172], + [ -25.0173, 28.5638]], + + [[ 14.7130, 5.3209], + [ 5.3209, 13.9072]], + + [[ 70.7645, -63.4467], + [ -63.4467, 65.2435]], + + [[ 22.3345, -6.8636], + [ -6.8636, 21.8361]], + + [[ 68.6943, -59.1313], + [ -59.1313, 64.2235]], + + [[ 17.5271, 4.4552], + [ 4.4552, 17.7854]], + + [[ 12.8015, 11.0781], + [ 11.0781, 12.7735]], + + [[ 13.1539, 7.8863], + [ 7.8863, 12.8799]], + + [[ 34.7351, -20.2642], + [ -20.2641, 33.7947]], + + [[ 11.9338, 6.9473], + [ 6.9473, 11.8147]], + + [[ 24.5658, -13.6693], + [ -13.6693, 22.7059]], + + [[ 58.4241, -42.3019], + [ -42.3020, 53.0221]], + + [[ 48.8177, -39.8837], + [ -39.8837, 43.9337]], + + [[ 147.7398, -133.5424], + [-133.5424, 135.1041]], + + [[ 15.8502, 7.6580], + [ 7.6580, 15.8307]], + + [[ 27.0421, -21.2855], + [ -21.2856, 24.4112]], + + [[ 52.8914, -41.6512], + [ -41.6512, 47.8518]], + + [[ 25.1393, -11.9030], + [ -11.9030, 24.4417]], + + [[ 17.2509, -12.4162], + [ -12.4162, 16.5515]], + + [[ 19.7517, -14.0952], + [ -14.0952, 18.4920]], + + [[ 11.3053, -4.7267], + [ -4.7267, 10.6444]], + + [[ 19.0246, 1.1547], + [ 1.1547, 18.1152]], + + [[ 25.9316, -19.2392], + [ -19.2392, 22.9529]], + + [[ 27.8642, -15.1603], + [ -15.1604, 25.0629]], + + [[ 81.8513, -74.2664], + [ -74.2664, 75.5958]], + + [[ 193.1199, -180.8360], + [-180.8359, 182.2395]], + + [[ 26.5785, -12.1299], + [ -12.1299, 24.4439]], + + [[ 16.8310, 6.7265], + [ 6.7265, 16.2020]], + + [[ 19.2811, 0.7273], + [ 0.7273, 19.4289]], + + [[ 13.0825, 11.1303], + [ 11.1303, 13.0251]], + + [[ 91.2415, -83.7443], + [ -83.7443, 85.6507]], + + [[ 35.2517, -17.9260], + [ -17.9259, 32.3152]], + + [[ 17.5251, -3.8713], + [ -3.8713, 17.1294]], + + [[ 18.5814, -6.3321], + [ -6.3321, 18.0508]], + + [[ 166.0098, -154.7070], + [-154.7070, 156.2176]], + + [[ 554.1421, -512.8062], + [-512.8065, 494.0856]], + + [[ 41.2154, -32.7674], + [ -32.7673, 35.6681]], + + [[ 77.4089, -61.3046], + [ -61.3046, 72.7787]], + + [[ 176.6129, -159.0269], + [-159.0271, 163.3616]], + + [[ 279.0750, -252.6055], + [-252.6055, 241.2155]], + + [[ 13.8944, 8.9691], + [ 8.9691, 13.7209]], + + [[ 59.0305, -48.6387], + [ -48.6387, 53.6854]], + + [[ 22.8731, -2.6588], + [ -2.6588, 21.6981]], + + [[ 12.6138, 8.5968], + [ 8.5968, 12.5848]], + + [[ 17.9688, 1.8215], + [ 1.8215, 17.3111]], + + [[ 23.6657, -4.0010], + [ -4.0010, 23.3156]], + + [[ 21.2486, -14.0232], + [ -14.0232, 19.6744]], + + [[ 34.6170, -24.5704], + [ -24.5704, 30.2356]], + + [[ 16.6254, 2.7463], + [ 2.7463, 15.8227]], + + [[ 13.0180, 7.2269], + [ 7.2269, 13.3839]], + + [[ 33.4208, -22.2579], + [ -22.2579, 29.4142]], + + [[ 34.1427, -25.7488], + [ -25.7488, 30.6477]], + + [[ 58.4289, -37.1113], + [ -37.1114, 57.2028]], + + [[ 13.0617, 9.7185], + [ 9.7185, 13.2593]], + + [[ 47.9250, -34.6681], + [ -34.6681, 42.7326]], + + [[ 84.8753, -72.5677], + [ -72.5677, 79.5807]], + + [[ 72.6410, -56.2821], + [ -56.2821, 67.0502]], + + [[ 31.9375, -18.1382], + [ -18.1382, 30.2671]], + + [[ 86.0474, -74.7233], + [ -74.7234, 78.4838]], + + [[ 60.8025, -50.4158], + [ -50.4158, 54.1241]], + + [[ 18.2592, 1.9508], + [ 1.9508, 17.5922]], + + [[ 35.4757, -14.8821], + [ -14.8821, 34.3570]], + + [[ 25.7048, -7.6104], + [ -7.6104, 24.0447]], + + [[ 31.5246, -24.4228], + [ -24.4228, 27.8581]]], device='cuda:0') +06/01/2024 12:45:02 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:45:07 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:45:07 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:45:08 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:45:09 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/adapter_config.json +06/01/2024 12:45:09 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:45:09 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_adapter.bin +06/01/2024 12:45:09 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/head_config.json +06/01/2024 12:45:09 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:45:09 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/pytorch_model_head.bin +06/01/2024 12:45:09 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:45:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:45:09 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:45:09 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:45:09 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:45:09 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:45:13 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [0, 133, 343, 1676, 2262, 3179, 5, 14553, 10, 6687, 142, 51, 9741, 1476, 4, 2, 2, 133, 343, 1676, 2262, 9741, 1476, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:45:13 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [0, 19993, 1595, 5, 177, 9902, 7, 610, 142, 39, 1004, 21, 220, 4, 2, 2, 19993, 18, 1004, 21, 220, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:45:13 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [0, 133, 1150, 2584, 5, 8416, 2143, 11, 39, 3701, 4, 2, 2, 133, 1150, 2584, 5, 8416, 2143, 11, 5, 2143, 18, 3701, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:45:55 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:45:55 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:45:55 - INFO - __main__ - tensor([[-3.6697, 3.6352], + [-4.2246, 4.0405], + [ 2.6616, -2.6700], + [-0.4207, 0.4255], + [ 6.0653, -5.8317], + [-6.5317, 6.3038], + [ 3.8945, -3.8085], + [-0.4359, 0.3615], + [-5.1009, 4.8762], + [-0.5142, 0.5098], + [-5.5476, 5.2919], + [-0.1034, 0.0712], + [-6.8260, 6.5431], + [-6.1620, 6.0179], + [-5.4300, 5.2809], + [-6.2325, 5.9906], + [ 5.2893, -5.1639], + [-4.4324, 4.3628], + [ 5.1583, -5.0027], + [-3.1700, 2.9834], + [-4.4366, 4.3196], + [-6.5055, 6.3138], + [ 0.4570, -0.3993], + [ 3.3525, -3.3523], + [-5.1196, 4.9516], + [-0.9428, 0.7652], + [-1.1390, 1.0968], + [-2.8406, 2.6141], + [-6.1150, 6.0317], + [ 2.9434, -2.8087], + [ 3.7029, -3.5339], + [ 0.6407, -0.5287], + [-3.8132, 3.6467], + [ 3.8627, -3.9039], + [-6.6655, 6.5515], + [ 5.6844, -5.5658], + [-6.3700, 6.2371], + [ 2.0129, -1.8867], + [-5.4174, 5.2369], + [-5.4805, 5.4054], + [-5.0058, 4.9114], + [-2.9092, 2.9904], + [ 5.8719, -5.7513], + [ 1.0980, -1.1256], + [-3.7832, 3.6815], + [ 3.5065, -3.3963], + [-0.2126, 0.3464], + [-5.7800, 5.6021], + [ 3.3152, -2.9734], + [-6.0724, 5.8946], + [-5.3324, 5.1837], + [-5.1201, 4.9717], + [-6.0972, 5.9778], + [-2.1916, 1.9824], + [ 2.1938, -2.0780], + [-5.1398, 5.0149], + [ 5.6746, -5.6002], + [ 2.8161, -2.6625], + [-2.5494, 2.4574], + [ 6.1326, -5.9852], + [-6.1049, 5.8821], + [ 3.1275, -2.9882], + [ 1.9537, -1.8577], + [ 4.7487, -4.5946], + [-4.3404, 4.1182], + [ 3.1361, -2.9896], + [ 2.9287, -2.7833], + [-5.2242, 5.0996], + [ 5.9291, -5.7693], + [-5.1409, 5.0372], + [-0.3020, 0.3336]], device='cuda:0') +06/01/2024 12:45:55 - INFO - __main__ - tensor([[[ 1.5678e+02, -1.3558e+02], + [-1.3558e+02, 1.4960e+02]], + + [[ 7.3653e+01, -5.4208e+01], + [-5.4208e+01, 7.2585e+01]], + + [[ 1.2172e+02, -1.0609e+02], + [-1.0609e+02, 1.1035e+02]], + + [[ 1.4741e+02, -1.3753e+02], + [-1.3753e+02, 1.4022e+02]], + + [[ 4.2369e+01, -1.3782e+01], + [-1.3782e+01, 4.1731e+01]], + + [[ 1.6438e+01, 1.3553e+01], + [ 1.3553e+01, 1.6576e+01]], + + [[ 1.7085e+02, -1.4770e+02], + [-1.4770e+02, 1.5894e+02]], + + [[ 8.5229e+01, -7.4587e+01], + [-7.4587e+01, 7.8713e+01]], + + [[ 1.6429e+01, 1.6915e+00], + [ 1.6915e+00, 1.5588e+01]], + + [[ 1.9472e+02, -1.8219e+02], + [-1.8219e+02, 1.8198e+02]], + + [[ 2.9322e+01, -4.5729e+00], + [-4.5729e+00, 2.9574e+01]], + + [[ 4.5577e+02, -4.3253e+02], + [-4.3253e+02, 4.2325e+02]], + + [[ 1.7528e+01, 1.4306e+01], + [ 1.4306e+01, 1.7499e+01]], + + [[ 1.9660e+01, 6.9344e+00], + [ 6.9344e+00, 1.9072e+01]], + + [[ 2.0581e+01, 9.4826e-01], + [ 9.4827e-01, 1.9553e+01]], + + [[ 3.0820e+01, -2.8126e+00], + [-2.8126e+00, 3.0894e+01]], + + [[ 1.4417e+01, 7.3063e+00], + [ 7.3063e+00, 1.4258e+01]], + + [[ 5.6967e+01, -3.6052e+01], + [-3.6052e+01, 5.3749e+01]], + + [[ 6.2771e+01, -3.7740e+01], + [-3.7740e+01, 6.0511e+01]], + + [[ 1.8312e+02, -1.6489e+02], + [-1.6489e+02, 1.7007e+02]], + + [[ 1.0785e+02, -8.1547e+01], + [-8.1547e+01, 9.9607e+01]], + + [[ 2.0783e+01, 8.9680e+00], + [ 8.9680e+00, 1.9856e+01]], + + [[ 1.1239e+02, -1.0246e+02], + [-1.0246e+02, 1.0450e+02]], + + [[ 1.2250e+02, -1.0403e+02], + [-1.0403e+02, 1.1439e+02]], + + [[ 2.8105e+01, -4.9362e+00], + [-4.9362e+00, 2.8607e+01]], + + [[ 5.0390e+01, -4.3516e+01], + [-4.3516e+01, 4.8656e+01]], + + [[ 7.2610e+01, -6.4001e+01], + [-6.4001e+01, 6.8327e+01]], + + [[ 3.1991e+01, -2.0412e+01], + [-2.0412e+01, 3.0953e+01]], + + [[ 1.6111e+01, 1.1455e+01], + [ 1.1455e+01, 1.5867e+01]], + + [[ 1.0307e+02, -8.7739e+01], + [-8.7739e+01, 9.4961e+01]], + + [[ 5.1204e+01, -3.3637e+01], + [-3.3637e+01, 4.7160e+01]], + + [[ 1.4071e+02, -1.3006e+02], + [-1.3006e+02, 1.3135e+02]], + + [[ 6.6428e+02, -6.4904e+02], + [-6.4904e+02, 6.6509e+02]], + + [[ 4.7017e+01, -2.8070e+01], + [-2.8070e+01, 4.3996e+01]], + + [[ 1.7251e+01, 1.3965e+01], + [ 1.3965e+01, 1.7127e+01]], + + [[ 1.8577e+01, 8.5181e+00], + [ 8.5181e+00, 1.8809e+01]], + + [[ 2.0975e+01, 7.4333e+00], + [ 7.4333e+00, 2.0457e+01]], + + [[ 2.4361e+02, -2.2752e+02], + [-2.2752e+02, 2.2802e+02]], + + [[ 1.6074e+01, 9.3624e+00], + [ 9.3624e+00, 1.5947e+01]], + + [[ 2.9728e+01, -4.7859e+00], + [-4.7859e+00, 2.9166e+01]], + + [[ 4.4085e+01, -2.1539e+01], + [-2.1539e+01, 4.2835e+01]], + + [[ 6.7386e+02, -6.4755e+02], + [-6.4755e+02, 6.4461e+02]], + + [[ 3.6885e+01, -6.7025e+00], + [-6.7026e+00, 3.3047e+01]], + + [[ 9.9326e+01, -8.5463e+01], + [-8.5463e+01, 8.7261e+01]], + + [[ 5.4326e+02, -5.1473e+02], + [-5.1473e+02, 5.1977e+02]], + + [[ 3.2823e+02, -3.0353e+02], + [-3.0353e+02, 3.0915e+02]], + + [[ 9.7619e+02, -9.1273e+02], + [-9.1273e+02, 8.7041e+02]], + + [[ 2.1452e+01, 1.9222e+00], + [ 1.9222e+00, 2.0731e+01]], + + [[ 1.0526e+02, -8.9273e+01], + [-8.9273e+01, 1.0142e+02]], + + [[ 1.6379e+01, 1.1271e+01], + [ 1.1271e+01, 1.6244e+01]], + + [[ 1.8761e+01, 1.5665e+00], + [ 1.5665e+00, 1.8127e+01]], + + [[ 1.9979e+01, 1.9925e+00], + [ 1.9925e+00, 1.8965e+01]], + + [[ 1.7761e+01, 9.1459e+00], + [ 9.1459e+00, 1.7482e+01]], + + [[ 6.3778e+01, -5.3312e+01], + [-5.3312e+01, 5.9399e+01]], + + [[ 6.8992e+01, -5.5594e+01], + [-5.5594e+01, 6.2782e+01]], + + [[ 1.9176e+01, -4.5902e-01], + [-4.5903e-01, 1.8388e+01]], + + [[ 1.8741e+01, 5.3605e+00], + [ 5.3605e+00, 1.9663e+01]], + + [[ 6.5440e+01, -4.9975e+01], + [-4.9975e+01, 5.9260e+01]], + + [[ 8.9870e+01, -7.4983e+01], + [-7.4983e+01, 8.1911e+01]], + + [[ 1.6475e+01, 1.2153e+01], + [ 1.2153e+01, 1.6736e+01]], + + [[ 2.2909e+01, 3.7129e+00], + [ 3.7129e+00, 2.1622e+01]], + + [[ 8.8434e+01, -7.1644e+01], + [-7.1644e+01, 8.1576e+01]], + + [[ 1.8339e+02, -1.7143e+02], + [-1.7143e+02, 1.7748e+02]], + + [[ 9.1505e+01, -6.7208e+01], + [-6.7208e+01, 8.7061e+01]], + + [[ 7.8614e+01, -5.9083e+01], + [-5.9083e+01, 7.5318e+01]], + + [[ 2.0310e+02, -1.8344e+02], + [-1.8344e+02, 1.9097e+02]], + + [[ 1.4230e+02, -1.2341e+02], + [-1.2341e+02, 1.3204e+02]], + + [[ 2.3033e+01, -3.3435e+00], + [-3.3435e+00, 2.2267e+01]], + + [[ 1.7061e+01, 1.0325e+01], + [ 1.0325e+01, 1.6816e+01]], + + [[ 1.4926e+01, 9.3028e+00], + [ 9.3028e+00, 1.4665e+01]], + + [[ 1.1173e+02, -9.9595e+01], + [-9.9595e+01, 1.0180e+02]]], device='cuda:0') +06/01/2024 12:45:55 - INFO - __main__ - ***** Completed training ***** +06/01/2024 12:45:59 - INFO - __main__ - Number of labels detected = 2 +06/01/2024 12:45:59 - INFO - __main__ - ***** Starting script ***** +06/01/2024 12:46:01 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}. +06/01/2024 12:46:01 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/adapter_config.json +06/01/2024 12:46:01 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'wnli'. +06/01/2024 12:46:02 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_adapter.bin +06/01/2024 12:46:02 - INFO - adapters.loading - Loading module configuration from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/head_config.json +06/01/2024 12:46:02 - INFO - adapters.heads.model_mixin - Adding head 'wnli' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}. +06/01/2024 12:46:02 - INFO - adapters.loading - Loading module weights from ./outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/pytorch_model_head.bin +06/01/2024 12:46:02 - INFO - __main__ - Adapter Name = wnli +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_down.0.bias +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.weight +06/01/2024 12:46:02 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.wnli.adapter_up.bias +06/01/2024 12:46:02 - INFO - __main__ - heads.wnli.1.weight +06/01/2024 12:46:02 - INFO - __main__ - heads.wnli.1.bias +06/01/2024 12:46:02 - INFO - __main__ - heads.wnli.4.weight +06/01/2024 12:46:02 - INFO - __main__ - heads.wnli.4.bias +06/01/2024 12:46:06 - INFO - __main__ - Sample 212 of the training set: {'input_ids': [0, 133, 343, 1676, 2262, 3179, 5, 14553, 10, 6687, 142, 51, 9741, 1476, 4, 2, 2, 133, 343, 1676, 2262, 9741, 1476, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}. +06/01/2024 12:46:06 - INFO - __main__ - Sample 147 of the training set: {'input_ids': [0, 19993, 1595, 5, 177, 9902, 7, 610, 142, 39, 1004, 21, 220, 4, 2, 2, 19993, 18, 1004, 21, 220, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:46:06 - INFO - __main__ - Sample 263 of the training set: {'input_ids': [0, 133, 1150, 2584, 5, 8416, 2143, 11, 39, 3701, 4, 2, 2, 133, 1150, 2584, 5, 8416, 2143, 11, 5, 2143, 18, 3701, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}. +06/01/2024 12:46:48 - INFO - __main__ - f_mu shape : torch.Size([71, 2]) +06/01/2024 12:46:48 - INFO - __main__ - f_var shape : torch.Size([71, 2, 2]) +06/01/2024 12:46:48 - INFO - __main__ - tensor([[-4.1948, 4.1521], + [-3.9235, 3.7475], + [ 2.7918, -2.7773], + [-0.3957, 0.4020], + [ 5.5706, -5.3296], + [-6.4246, 6.1942], + [ 3.4758, -3.3889], + [-0.5247, 0.4619], + [-4.8835, 4.6704], + [-0.2999, 0.3041], + [-5.3559, 5.0989], + [-1.0159, 0.9557], + [-6.7311, 6.4538], + [-6.0829, 5.9357], + [-5.3050, 5.1600], + [-5.9572, 5.7143], + [ 4.9291, -4.8109], + [-4.5400, 4.4694], + [ 5.1163, -4.9506], + [-3.2082, 3.0232], + [-2.8064, 2.7535], + [-6.3700, 6.1831], + [ 0.8871, -0.7985], + [ 3.3241, -3.3139], + [-5.1763, 5.0130], + [-1.1824, 1.0123], + [-1.3765, 1.3347], + [-2.8929, 2.6715], + [-6.0371, 5.9566], + [ 3.2774, -3.1176], + [ 3.9032, -3.7125], + [ 1.3809, -1.2367], + [-4.2460, 4.0799], + [ 4.0037, -4.0384], + [-6.4887, 6.3756], + [ 5.5597, -5.4395], + [-6.2465, 6.1108], + [ 1.3193, -1.1992], + [-5.2360, 5.0519], + [-5.4160, 5.3383], + [-5.1011, 5.0022], + [-3.0719, 3.1655], + [ 5.6189, -5.4751], + [ 1.0337, -1.0436], + [-4.6815, 4.5503], + [ 3.2786, -3.1774], + [ 0.6436, -0.4504], + [-5.6175, 5.4401], + [ 3.7776, -3.4085], + [-5.9687, 5.7962], + [-5.2303, 5.0854], + [-5.0567, 4.9014], + [-6.0189, 5.8971], + [-2.4915, 2.2751], + [ 1.7467, -1.6322], + [-5.0582, 4.9358], + [ 5.2805, -5.2006], + [ 2.9379, -2.7646], + [-2.6141, 2.5181], + [ 5.9038, -5.7612], + [-5.9966, 5.7756], + [ 2.6726, -2.5321], + [ 1.6857, -1.5794], + [ 5.0465, -4.8843], + [-4.6609, 4.4268], + [ 2.6276, -2.4852], + [ 2.9757, -2.8155], + [-5.2663, 5.1421], + [ 5.7075, -5.5494], + [-5.0099, 4.9039], + [-0.1593, 0.2164]], device='cuda:0') +06/01/2024 12:46:48 - INFO - __main__ - tensor([[[ 1.3041e+02, -1.0549e+02], + [-1.0549e+02, 1.2371e+02]], + + [[ 7.7824e+01, -5.7689e+01], + [-5.7689e+01, 7.6018e+01]], + + [[ 1.3846e+02, -1.2026e+02], + [-1.2026e+02, 1.2544e+02]], + + [[ 1.6119e+02, -1.5010e+02], + [-1.5010e+02, 1.5271e+02]], + + [[ 1.1410e+02, -8.5075e+01], + [-8.5075e+01, 1.1193e+02]], + + [[ 1.7818e+01, 1.4477e+01], + [ 1.4477e+01, 1.7968e+01]], + + [[ 2.1327e+02, -1.8811e+02], + [-1.8811e+02, 1.9748e+02]], + + [[ 9.5771e+01, -8.3732e+01], + [-8.3732e+01, 8.8112e+01]], + + [[ 1.6854e+01, 1.6320e+00], + [ 1.6320e+00, 1.5990e+01]], + + [[ 1.9513e+02, -1.8198e+02], + [-1.8198e+02, 1.8180e+02]], + + [[ 3.4047e+01, -7.9809e+00], + [-7.9809e+00, 3.4203e+01]], + + [[ 4.3573e+02, -4.1369e+02], + [-4.1369e+02, 4.0794e+02]], + + [[ 1.9275e+01, 1.5076e+01], + [ 1.5076e+01, 1.9196e+01]], + + [[ 2.1621e+01, 7.2141e+00], + [ 7.2141e+00, 2.0930e+01]], + + [[ 2.2299e+01, 7.1611e-01], + [ 7.1610e-01, 2.1083e+01]], + + [[ 4.3542e+01, -1.4525e+01], + [-1.4525e+01, 4.3248e+01]], + + [[ 1.4044e+01, 7.1813e+00], + [ 7.1813e+00, 1.3877e+01]], + + [[ 6.4321e+01, -4.0551e+01], + [-4.0551e+01, 6.0516e+01]], + + [[ 6.3479e+01, -3.5972e+01], + [-3.5972e+01, 6.1594e+01]], + + [[ 1.9644e+02, -1.7608e+02], + [-1.7608e+02, 1.8189e+02]], + + [[ 4.7813e+02, -4.4319e+02], + [-4.4319e+02, 4.4058e+02]], + + [[ 2.2209e+01, 9.6529e+00], + [ 9.6529e+00, 2.1218e+01]], + + [[ 1.2617e+02, -1.1445e+02], + [-1.1445e+02, 1.1682e+02]], + + [[ 1.4347e+02, -1.2252e+02], + [-1.2252e+02, 1.3385e+02]], + + [[ 2.6554e+01, -8.2178e-01], + [-8.2179e-01, 2.7081e+01]], + + [[ 5.4028e+01, -4.6349e+01], + [-4.6349e+01, 5.2516e+01]], + + [[ 8.2694e+01, -7.2855e+01], + [-7.2855e+01, 7.7691e+01]], + + [[ 3.5121e+01, -2.2341e+01], + [-2.2341e+01, 3.3967e+01]], + + [[ 1.7288e+01, 1.2687e+01], + [ 1.2687e+01, 1.7018e+01]], + + [[ 1.3049e+02, -1.1152e+02], + [-1.1152e+02, 1.2103e+02]], + + [[ 6.1717e+01, -4.0884e+01], + [-4.0884e+01, 5.7092e+01]], + + [[ 1.5962e+02, -1.4587e+02], + [-1.4587e+02, 1.4757e+02]], + + [[ 7.1002e+02, -6.9214e+02], + [-6.9214e+02, 7.1304e+02]], + + [[ 5.2373e+01, -3.0203e+01], + [-3.0203e+01, 4.9030e+01]], + + [[ 2.0410e+01, 1.2395e+01], + [ 1.2395e+01, 2.0171e+01]], + + [[ 1.7494e+01, 1.1637e+01], + [ 1.1637e+01, 1.7691e+01]], + + [[ 2.2520e+01, 7.8180e+00], + [ 7.8180e+00, 2.1914e+01]], + + [[ 2.7033e+02, -2.5441e+02], + [-2.5441e+02, 2.5378e+02]], + + [[ 2.0462e+01, 6.9179e+00], + [ 6.9179e+00, 2.0070e+01]], + + [[ 3.2784e+01, -5.7668e+00], + [-5.7668e+00, 3.2008e+01]], + + [[ 4.7022e+01, -2.1662e+01], + [-2.1662e+01, 4.5399e+01]], + + [[ 6.2850e+02, -5.9966e+02], + [-5.9966e+02, 5.9768e+02]], + + [[ 2.4092e+01, 3.0384e+00], + [ 3.0384e+00, 2.5205e+01]], + + [[ 1.0340e+02, -8.8599e+01], + [-8.8599e+01, 9.0840e+01]], + + [[ 3.1094e+02, -2.8153e+02], + [-2.8153e+02, 2.9783e+02]], + + [[ 4.1418e+02, -3.8716e+02], + [-3.8716e+02, 3.9161e+02]], + + [[ 9.7178e+02, -9.0696e+02], + [-9.0697e+02, 8.6491e+02]], + + [[ 2.3515e+01, 1.0905e+00], + [ 1.0905e+00, 2.2650e+01]], + + [[ 1.1334e+02, -9.4657e+01], + [-9.4657e+01, 1.1131e+02]], + + [[ 1.7486e+01, 1.2315e+01], + [ 1.2315e+01, 1.7293e+01]], + + [[ 2.0787e+01, 1.1168e+00], + [ 1.1168e+00, 2.0019e+01]], + + [[ 2.0311e+01, 4.0917e+00], + [ 4.0917e+00, 1.9311e+01]], + + [[ 1.8545e+01, 1.0817e+01], + [ 1.0817e+01, 1.8289e+01]], + + [[ 6.2943e+01, -5.0940e+01], + [-5.0940e+01, 5.8799e+01]], + + [[ 7.4593e+01, -6.0748e+01], + [-6.0748e+01, 6.7870e+01]], + + [[ 2.0733e+01, -4.6349e-01], + [-4.6350e-01, 1.9842e+01]], + + [[ 1.9035e+01, 4.3988e+00], + [ 4.3988e+00, 1.9810e+01]], + + [[ 8.0635e+01, -6.2475e+01], + [-6.2475e+01, 7.3018e+01]], + + [[ 8.8960e+01, -7.2666e+01], + [-7.2666e+01, 8.0607e+01]], + + [[ 2.1183e+01, 8.3794e+00], + [ 8.3794e+00, 2.1655e+01]], + + [[ 2.4911e+01, 3.6821e+00], + [ 3.6821e+00, 2.3469e+01]], + + [[ 9.8942e+01, -8.1679e+01], + [-8.1679e+01, 9.1068e+01]], + + [[ 2.0094e+02, -1.8876e+02], + [-1.8876e+02, 1.9544e+02]], + + [[ 5.8267e+01, -3.0670e+01], + [-3.0670e+01, 5.6574e+01]], + + [[ 7.5800e+01, -5.2832e+01], + [-5.2832e+01, 7.2862e+01]], + + [[ 2.5593e+02, -2.3475e+02], + [-2.3475e+02, 2.4045e+02]], + + [[ 1.6505e+02, -1.4341e+02], + [-1.4341e+02, 1.5294e+02]], + + [[ 3.0017e+01, -7.5571e+00], + [-7.5571e+00, 2.8914e+01]], + + [[ 1.9284e+01, 9.4893e+00], + [ 9.4893e+00, 1.8898e+01]], + + [[ 1.6753e+01, 9.4572e+00], + [ 9.4572e+00, 1.6396e+01]], + + [[ 1.2912e+02, -1.1510e+02], + [-1.1510e+02, 1.1708e+02]]], device='cuda:0') +06/01/2024 12:46:48 - INFO - __main__ - ***** Completed training ***** diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..24059993caf8f94e2d0648edd1620dce7d8f8c95 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3e4e79fee1c41b4a19f82331f0ba2df534a50f00c5878eabb0b3858a957830 +size 38 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..40099186d673a6c20386d7c8c2c341c0eb0d383e --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a943d7a47bac73b5c7c209d5129d1b23571b9e55ca0682f37b891171e295ce75 +size 11877 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..172518eb14cd554c36b6a67a79c8dd5b23737bb7 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_0/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57b1773ae54c754f48dec707782db7c51cc750c3e781b9d34f5affc8ab2c339 +size 6092 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..d04cf7db5e004e5d0f5b02c8432d31aa58eb1185 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:400d8bf9845c12f5e1aa6f68a4dfcd2f15df00322fed148d524a3dea53e1d67e +size 38 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..251dd9bb50b4d3d3acb458377eb7b67febf2861b --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11736ed054bfb3085cdf9f85102bfbacd211e791ea4379f882af1af105927836 +size 11881 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..ec836a5faf2d5f3fda64af81358b694984f96a3f --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_1999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd7a9e359d4ea9dcd201925062948cdb8d198a3233da0d5bd5956b24521c6206 +size 6121 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..6082eb359f52efe8dcf0a1f27004cfbf7e19ff35 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae87f4d3fd0f351faa101142b40b24809b8848e7d2340b7e5949d409b59039f0 +size 38 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..e232ad0abbcb70c7d15d2a1195628dea09d7e68c --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b56b2ede2b5a6006e2b8558ca7c454e3f1bf8085e8344aea4e87598d587b2c7 +size 11903 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..d4a41afbf0f84aba87ba2a3c1726cba374ab55fd --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_3999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:500219995c93252eaed295b95db9a01fcebfb64a405b370d289a90112590fa99 +size 6142 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..d04cf7db5e004e5d0f5b02c8432d31aa58eb1185 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:400d8bf9845c12f5e1aa6f68a4dfcd2f15df00322fed148d524a3dea53e1d67e +size 38 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..ffeed17bacb4d058d0779326a54f15e0d21879e9 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da882f87a594e34645540678d6b1261537b7133f90818044fa92b680cf111ac8 +size 11950 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..9923c1bd4cdd4e93250a56372ec0a1e7c5f529e7 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_5999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e325b891b429f7851a95f1c0ae5ad37f8cf13ffc7f9358b4370fd41c860c3514 +size 6153 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..6082eb359f52efe8dcf0a1f27004cfbf7e19ff35 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae87f4d3fd0f351faa101142b40b24809b8848e7d2340b7e5949d409b59039f0 +size 38 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..b81d47e4670b130af6e9423a949d39901cc71213 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6ca8a5c35ff8258ed5ff25a064c9dfa4c7d68806224de88409737048ca5830c +size 11991 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..ab3d88de9cc7d5deac55bf683d9fc289053c1135 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_7999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e60fe0000943c1b3889aa28d4a3423716d5c19b463cdb25539d79396663f31 +size 6149 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..2e8279dcf64ce3b546eb6b794f0354ac2d72c54d --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff48406a92f36cdb9b71e56b08de43095ecf800843b3564f92a1f0eef5b9c13 +size 38 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json new file mode 100644 index 0000000000000000000000000000000000000000..ca518c8525ef07f520829eee41ab953db3517ca8 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d190830116335eba879e1bd7984f0f1e1d2cc5020d48d5556fd7c4d91d695f7c +size 11958 diff --git a/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json new file mode 100644 index 0000000000000000000000000000000000000000..b3a34332eb10c1899e773bbfed89d02949233f99 --- /dev/null +++ b/outputs/wnli/roberta-base_adapterstrain_val_0.0001_65_8_10000/step_9999/gpu_stats_la.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5081ec60f3804f7b3188468141c88e37e2881c375dc3cfc85bd72f31fd62add2 +size 6153 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..b30064d796667271df48911d577bf19d1c79a366 --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0a7bcea357949ecf364b0d55411c74ee1ee5646ca08805d3d4253374d186b8 +size 9644 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5917e31eacebbab6d7565c235f524fd215fabef --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec5794122e22474d62b3e25445e7eacdc2ed8216c760d26e715418245b6dd300 +size 18033 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a2887c120b0c2ca3441e011e591d4670e8007a9 --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a366d71774a8c9d5938ab6cc59b782b385d1c745fd5660970f18d389f90ccc2 +size 1379 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..73d8085e4bbf8adf9e1a8030a05f60c3d61d4419 --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7c8dd345c52e50694df1eedffe909fffa77eaf1ecdf28980932a3966d73122 +size 9644 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..155e5ab44fcafea6e945cd4e710ccc516567d1ad --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a169e8c3297bc86741b2deb68be248a4d367eb10e1fa78bafad76a909745628c +size 18033 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..e24b2f1184e45f94df4f8849d4f20b3c1ecf119d --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c904168095232ba0481af92fb41f09edba5c28ff1620617d219f31a4d08323 +size 1379 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..26e835e1cd460f0af1a22458e5f45bec096b719d --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aedf772ca9d78880637035d66d42ab05497ef3f7902d7c3240a4bde70713a8f3 +size 9644 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..3614ca229f28f384c6aeb358ffa3cc4b2f1d3374 --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:363448c4199155c5440667325b3ab458368269328609d4f99d2d678b4b8d8dc9 +size 18033 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c6f95b5892a22630372ec37bd736a0729dace0a --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc7d86400055ddf7927fde7feaa52eebf59e45ca3e629c3c84169dd90e4ed834 +size 1379 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..49c31d3b5d48b52d82fa1dba37bc28d834f40acb --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:517fe09915ac3ce4d2bc7e198ae065357fb5ac7bc25ea99b2f7c77e02f07c4c5 +size 9644 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae5b2e43a706506fbbbe898a2a8cfb1a00d637bb --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8599d3c84a0c9e88fa348d0840ccf62e8361bf4df0556d08c6417e8583858b42 +size 18033 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7580cb5c18875261df81fb6cfce977dc10f8247 --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad722e683d6081f049f078c3eb272eb5dbe9eda5fb82b4e0aa20597aef5eb24d +size 1379 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..30892d8cc5bc81bfe38b609f754d5518e5eca004 --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9da3f069cd375889c01fec68dd661cbfd78f0d6cd703f72e305d73adbfb964d +size 9644 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..06fda9ff475fc0a75cee55e5fdce4392eb7522cb --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1781344298e6208e79be5a188cd4631d52d195106b5a7b177dd85085504e69f7 +size 18033 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..b360ed5f25ad2b0559b90db872c3d0fc68d67ce1 --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be733e4257b1e03d96dee47265f56de859791271bc75e87e45cae8e71bb1f58 +size 1379 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..d89c33c45144082e0cb82ce69ef74d75da75874d --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcf2a81caef99c05db2c17c5af911bc6fca4030860fc271fb1774cbf11ec32d4 +size 9644 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4d0770e08b4fbde8658e8609dc5d50a3d37f5b4 --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aed8d128f5757e15f6a43faf95045f53a7c177ad4b5a157546501635a546719 +size 18033 diff --git a/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0b8567328405f5d83e837e293307836e10b9abe --- /dev/null +++ b/outputs_laplace/cola/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40d5285ef004cbcc2ab6cc41fd9975d603260c0fe8159c24d87a86f748057203 +size 1379 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3757efaca3feb121a029c8694cd7eada982a704 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61a189313f6fa7050bcf27dc8e88151c3edbe6663f65e9ad1db5167e9d8d1c9 +size 4588 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7821289e93f4b1e278ba039a6b0ee7c220a9c09 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d5d98e3c5599a9259833dccfdb91508382d63c73acadc4e4009a1f75c183c8 +size 7857 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..40a1ff3bdfc04f8b66c47df97c0382b2b5a19577 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4211d14c7bbd73932209047e59cae42c4714c4569686c76021a9ef2a3866b696 +size 1379 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c1ac8aa725e5ae56bbbcce2a3bb619d2795d559 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce3a23409b1598325b76c0ecb668c26461805ef5ebad98ba2fd49b018edb8c0f +size 4588 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe643eef888922629aeca6ca50a1e4b06a2d5f07 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac2572f55f48098e155af354c65dfaa5c636fe714add9688e45d3db9ee74af21 +size 7857 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..49da97a0bb1305d5101233a38c5d8686f08c8cef --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ae454fb03640e6c9e3d75b05501e51dbad5720e6c6e152f1097edd4993ba69 +size 1379 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..47b99314bdc420d6d553b8c3d18f03075d5f092a --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94a68fe0954cc94f7e234a27dfe64bc504bf7ceeb49ef9888634390704b16094 +size 4588 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..e27c30e1659d48c6fe8ebf4df636ba15d97869eb --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b16927e887af7da7db99b2b9aca5941ceb6aa6f72ca1c45643935f7c9e98e2a2 +size 7857 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e239865b6e27ca245586285c02cea8bf2428f87 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f202aa44919a6dacaf42ebd5864febb5c3f4356aa0de004620bfdfec1100bc1a +size 1379 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..43533473992ac634615b702ad122e10f936405b8 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0a6d47a42210948a68176987caa65964d3f22051124de94f3bd30c9df2b8f04 +size 4588 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..10dc34e3a8c1b914716dbb0fdac1dc9a133c1a2f --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591456e56a774aced6620818ce4be419c2c8a17e4a613bd9cc80874a53d5504f +size 7857 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..74e910b1c99c317da787d9f734b72a226d9e1919 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16f9b930ebf9e50c6b00814eb7bf06cbd72ac7a5f08a7df0a44203cc5264ec1c +size 1379 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..614529b08729aa0fb50a096c755d382506c086b2 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c87e24553741860426aa154dc8866e0d183b39787f933ef7834e6f97a28e550 +size 4588 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..2590f50ce7ee8f2513e9f356020512c60a036b82 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d22d10d248f91e5334d7184c74c8acb3fea2ca04b6c70f49b46cb295e38dc2ae +size 7857 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..46467ad0c3b37f818f4bb41929f9903ca0bd3074 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33ab557e833f8f27b23eb2ccd13f52527e8e56406a860f5c0484be6ba65f866 +size 1379 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bdd54e204b0cf49a5b0bb5eb1d469db6b5ce2e2 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee0db94e2d690dd21b8a9b8766718f2b4a02cadc358041c589bb7c5a9a1fbab3 +size 4588 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..c89e7437acc803908a46bc072ee307a10a39d7ed --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f20e678401af85f9ffc51e0092d251ed16f4a7c60d300bd90c559b1cc98d0c5 +size 7857 diff --git a/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..774d93b5b08b9670a8a2a2f2920c5f58eb981997 --- /dev/null +++ b/outputs_laplace/mrpc/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1508196963933b60ae574bd6294957f9d33f1048cd416dae6040d6bff135b0c +size 1379 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..af4b3f16c8d4a7a385cbb2580abd612ab5fbb2d0 --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3017003df2b95eb0fb27c3e313527d5c4b563a3a58ce93cc730ff9b42dc5f7be +size 4588 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..3dd72ee376e94d401d5bfc2572d3529bccc83160 --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de10f157767f7ac544567582ea1496627432156700561bcfc47b502502cea8b0 +size 7857 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..177c3703c9a497b67ff948e412ea26b7c957ddfc --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69081a6b4dfaf8aa262f02d7c3851765d5e697a4f70615e01731f301687c2e23 +size 1379 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd92592170c0c6ea6058a3d706ad3f737611c7d1 --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8aa4829d639bda136eafe6a8bfb5e5c0f37ee9a0a697c36d4d8ec9fb123633b +size 4588 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..e70254666ca87fd64f5650de6f3506eb405210f2 --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712052b29ed39eab8634a93253f6e31ddb1dc27bb9b24a448b11a5b02f9a7364 +size 7857 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..58dd6322455fbe784d3e1fea41813158182bd284 --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50102c681d4d6c1f32e2d00b830d7a2c1e0e06dc4718214f98ce9e3c235f8ec8 +size 1379 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..79fb6242d24508d50867f15327e17b37ec12aa36 --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461730318e7de05a416008ba12e11ccd905f6c03f03114cf5d67d590e59c9654 +size 4588 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a2abfab41fbda60224004f57076f84cd0f996ee --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c214a509186b2cd2dae83ad2b82d42bc7d912baef39652996c9ee8534ebfe967 +size 7857 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..91eea6c24dcefe6ab2a00abc6d58436f09761ec3 --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bd8101ab1240e1e6580959c073c5bd8dc76d0ed3878c240f4b48bc331374537 +size 1379 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..5cf9ee237a2ff5460eb35feae07eacf820cd846d --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e805b10e49374176bfa5638f70f32d9266e47c2a2d64d82b59184944ef045c2d +size 4588 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..46e524276334a81f4f61b188f6745f89871de0e8 --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56597f1d85239a07fbaab19e9ac6a5b222b6f1eb74655799dcb01602f92a4051 +size 7857 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..e95e99068783eeba84ed2bf6bc2ab306b25b596e --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b8128a4bfb86e623ebbeec388cbf4a4a5a184929696c6181383a7869b358e9 +size 1379 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..65485b62cb94e771e9f7b716086dbc11d5d4c0ca --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a202769e712024e4bae9ed99b4f0b63d01cf3342d7617c31f451c2844ab89816 +size 4588 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5dba4468596b07f85b251f54c6419ba48b188c3 --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4853b886f3965f9f6b178c7631a07125a5667cc36bdd306d40304a6b909f01d +size 7857 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2e13aa62ef522b3728617d2923d0127163e6f48 --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3140aad32ab703eb605205f5f67bf320ffe6b93ec54a86dfc9eb53cf462b48dd +size 1379 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d55785e6caf2d0f30cbb3f0f43a86366d96f39a --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31012c04c5403f0835e7d469849c8676479068821a77a5df607f1ecc637e4db8 +size 4588 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ba180bd8768ecec3da42bb5f1d54c1abb6136b2 --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3061fe7f264060fe2d7edf7113a103b6d7b145059ace9ff1ec2bbaea239099ba +size 7857 diff --git a/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d3a322f1c4231cb49f6100efab5ea05bcf6a0aa --- /dev/null +++ b/outputs_laplace/mrpc/roberta-base_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b5a0fce86ff359f537e7a2d3d8912566b0949e2883446e923dac2457e9786b9 +size 1379 diff --git a/outputs_laplace/rte/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/rte/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..743202ee7b531ab9c80d411f071b7f6760a7210b --- /dev/null +++ b/outputs_laplace/rte/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a1b5389882ca30b662d7a79c69a6d01958aac7e2501cb59db2385f3b69e35e9 +size 1379 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..eed972ee7a813b3e124df6a30ab6e3f458c7f1c9 --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:370b54497f5074eb1d5f33c39e9e9ef501b156bbd555f288ff2fbbf4ac27a05e +size 8300 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..12df8ed9f70a88de97fbf24a023792e971a01076 --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec262a85faa1e81efcc0e9980137d964c4d62e307036155f10159580a6086fd +size 15281 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f213c98ff5bf925277ab5465ec72dc288d48c01 --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3667c176d09cddae202ff683e6e40d83f17a7bb986bfacc18d95d36ac059b74 +size 1379 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd1da9974ec10ccad0c1efc83e227cf062697fc5 --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04230dd28db32401362cb9ab81815b9c6d2a0db3855c1bb4fe6fae3c482ce7b7 +size 8300 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..23bcb95bee69486c9d22812abd98fb53e5e8aeaf --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0981a8fc54d7f19082c1db7e3e2d8264e503c0a4e538f747e16307c929ba773 +size 15281 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..db29b5825a61985d7aac9cc65191e7ae1dedef5a --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ed94143909e079483fbeb8ee1bdd195f7daa576d5c7021d93e34f39bb1fd13b +size 1379 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0f94909fd243eac70cc7265edac0b21ca13d124 --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0c4c98fd22ce68fdac7ae5e5f4dfaddd2e3a5179279c82c0ee028f21a2d126d +size 8300 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..354235334ed3e71d2e9328df189d05f3c2aaea0e --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c74c5c1f69123bbf0b2485a915cbc004009b9fe0b44048440ed05c6f538314ce +size 15281 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..196b8c584117b693571ea625cabe6e5063a274e1 --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a313fc3fe2548393e41731f9b5f4d887226d0687f40255c6649c8843abdbf72 +size 1379 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a9fcb6534ced3666fe9b5b5f02b87a9bdb22366 --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b17687273b27e53d6742fa83dfabf5e320bd4136be8291bf25e0bf27ebe80f17 +size 8300 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..25a07d1c803e56aa450d0bf1997b5b3a4f49c145 --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd3bd69b66526efff7d23c1c28a7c45abd296e4b41ac1892a05e378fa8159aae +size 15281 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..f81ee9c3217f5130da7c4d7948ea67006bea3b27 --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be68dba1774227ea888c10f32953f07e81ec338c2a16dc6db22b1427890fc173 +size 1379 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..932491088d70def9ec2917a5cf7bf12f0832612c --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee59bdfbd264b93de92e95bf1fa1e439fae8adc1467cdfe7ea87294a93a4a21d +size 8300 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9e0a295e35e1213e1bf40a43ab6b1afeb711eef --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95a9f40df03e39919acb78ad7cb6bf926d68bdae8d34e5805feb6b0de5053eff +size 15281 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..d75b1478dc212f74b152c77ee078a68d380460c4 --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b74338e41c328ea1ae6a1fd0fff8aa5170bcd48124d0e0311f1f2e047fc06a +size 1379 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..47e3f80742ee9fa099fdf283020df00e6acc7a66 --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66cd4ca77b95e89208a553e65e20653025abcc3387ae53c13c8201727a12d6fd +size 8300 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..373773fb92112685f301b4bdc9358298d462709a --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd43daf26769fcd91606c8554ddf9abad5bc2a5b8861168c0fe29e9233c52d87 +size 15281 diff --git a/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..d614aa63f80abbcd689f707fd80a745f6d4b1bfe --- /dev/null +++ b/outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea9a09498fa5927a74567a8c6c37ab85bbbd1f806f961b98fbc155a1f4d4b62 +size 1379 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..02a28be42240f849f5c96e5e3fafcaea60c4fa4a --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5410f8e61a673ec4b4d6f458f14fd543381c0811f5d8929b0d6b47149cf9075 +size 1900 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cee103bf16c06dc45273ba3be5a3b5afdba8dc8 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338f123f19331ee19e75ccf1541dd024b51eee10d8350c2515d66f6e749abbae +size 2481 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..6992faa9133f7f4f669adc0ea7f65572155ec7cd --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066a4d36ebb0ac9e90134b7987b9376974aaee6872b52f50387b061a53a2f77a +size 1379 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c91dff6943599a5d9f3c68d37b425a758b3c02c --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398238ce8bad819876386603054593827e5c8a11f6b6bef8b58acc083c4354b9 +size 1900 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f7d4397373b51dd81b2ae4fb16a7ecf7acc8635 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4e91c2a1314f300eea7dbc3b6470698e9352319b441a6d7caca971617ead03e +size 2481 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..422e6252a54396fe33f82b5dbd84814b337be4ba --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abf66b66725eec6dca4ff1c80da7d2d9956328413f11035d0965171d318bcb1 +size 1379 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..68a45e6c78d9d4790a735d6bcd8eaddd05b71c27 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1e79317a02749a79dc9a375545ee8f95a7679a500f38839155165f924e8ba2 +size 1900 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..882c8e8407db6dd5becfcee3cd0cf8fe6ac9a071 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ed57376e279f3dacc7f94fe5bf3c565a94e77753bc72ffb2edc5066ec4e336 +size 2481 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f6aadea2dda3b988267e40f467a8769a0fd1f80 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aebc5c351c9ceb4bd59b5578252d018fe7964420d17ab763f48856976975f40 +size 1379 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b90c728f977bc02de3f5e45f5481cd22536c25c --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247452b2c41c03a3083c1314ff9f2b4ac415958a84938f748b9e3bc7e539b40d +size 1900 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..e38f169e1d37a75a9065808e50d4471f00cd0b78 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13f7e650fd29690ce373c9cc172fa4d6eec8dd06fbbd3cef9bd153a50b75c144 +size 2481 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..1093c90ae78636a36fb2bbe9d36e92e1a3d90d10 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5277737eaf3be287750037aafd530d48b090ec14fe049c37cc29a057e9a3388 +size 1379 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..cae1d244db345ed50084cf612ced628140470d57 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791bb8ab37fc424c8590a177ffe1c073eaff4aa092b218a6ed0850477df2b6aa +size 1900 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc90ee9820025c79f11eb65a0ab0f913c6ebaaa4 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b41b5740dd851fae1426bce0e940452b6db85f4d44fcf644a2448a4eed721a5 +size 2481 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..8768c9cb141c2fd9c55c76e7e66689a490a19d1c --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ef1d34ee7e355fddf71839e59795d3981680708b35b8b97231aadb5e8fbb4a3 +size 1379 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..6611eac73ca1ac11ab34da23633126780bf37180 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d18726aadedf4765bf70065e0df431530fee8675b2d27182494828671d86394 +size 1900 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff5d5c04cd398c26307bda2b335539168dbe2c86 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c6bdeb81d85150fbede07af29d06893d0be85119e2fa43763ca6434541f1836 +size 2481 diff --git a/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..a296ca0d3ca68b4a60a2c868a6f29237b303ddb1 --- /dev/null +++ b/outputs_laplace/wnli/bert-base-uncased_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33d10bf3f646ae7de0a95e48b22caba00871850265870e71f9cb25fd70802ae0 +size 1379 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..85508df75bc127412ea8f72cc28f38f0cf0ad865 --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_0/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15370dd0839ae6c150d451e41b2c28d4ed8c63ad1bec33fb9b3702ee00ddced9 +size 1900 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..21b393ba4a16d19b58f744b54e66d6192220c3ce --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_0/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ade2db52005ecbb0c9e71e87005b10f44243abfdb1be376b28ade66562561f1 +size 2481 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3ad88ff2d060bfcf33f40f44b7edd06afaa2276 --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_0/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b71d958ab30179321ef89ca77be9c1713d2039c673559681d6c79351ad5cda +size 1379 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..585b257316ceb8ba3eb5425a1cb79d1133f1d01e --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f2d3e39c777d897e2ef3874e353f3821f9ae5c578f8fc43b0db0f9d8b5b683 +size 1900 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ca03099395165a255d4cfe59d576bd9fa6e3851 --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_1999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21eb716deef38b9b32cdbc6e9ecbb01a0ddcc5c57663b4448a3a97436a9452b4 +size 2481 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..603b71a52ad47649902de9af0b6595211a579dac --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_1999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f88acf7005604a16e3b9f2fe8f0944f9969dd9180d5b2bb1a1117e114b2a1674 +size 1379 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee4210be3fccff5ae9bce0137d78ede4900d2e60 --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5135c217da9565acd8bc251048f9affddc00c8371b4dc4ce6ef06ad579694aee +size 1900 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9043fad6fb7f287d8c9db3dc68455b58b5c9c6f --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_3999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:256b1c32b7a036a5b8409012b77451750d9d0ba615f81ef445e54451a64a65c8 +size 2481 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b5635ef5cc7f1a62185f8546858e4a7cc5dc360 --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_3999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:126768462bc754275bd064302a1e0f3105a09be91d6bcd60f0b5b664760f52d9 +size 1379 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d60601cef23b84b2e7ead0ea976048be5ddcf0a --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4522561f4bc49a73f0b2ed52ecc4d1f4bfdc7582aa124057df13f15f8f8eb001 +size 1900 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..7bd424b6e22f09effe18219d5f5e9fab2ca33309 --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_5999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b651ce69aea2a3b4349d80573167ec6ee44d975ff2d839128ea3644ddec4c0 +size 2481 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..702efb8cb66703a46cd15a6b6cc62c910824ba26 --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_5999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df31045c0d605c7064c9b6a87ee04a6ccaa6499037e1e3e4641de6b8c4e80aa7 +size 1379 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..266d3463fca4c578e15fffbe31b5c1e5a0b6bf08 --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc61ccab20e6c1ee89d5275f0a451c847e9b0c7e050df630b2ba419f27fb46d7 +size 1900 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..20c16e6082f15f29f8fd51b02edc69a424d0ecca --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_7999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df63ce4a07a451e1a8c7d2596b5677885044625cd6715c4df1d57b1980f54aad +size 2481 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..799912b28fd532e0f693fbf90e00b84ca39934c7 --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_7999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae4cf328b5d86d85ff3621eeb17f51373c0e99c5f9d0ad5c4e66461a3354366 +size 1379 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fada0aadb53609a529f5bdd73d842f2abe7ac1e --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_mu_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15a585c80348a1551c30eafa6540321669816aefa4273f35bbb32493fa80512 +size 1900 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..3391916b590dd30e5a6328dd169e7867d936d8e0 --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_9999/f_var_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a23b6eaf71a0521b8da23eff531e477555cdf10cd591442c2945c4f501926949 +size 2481 diff --git a/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb519c2371fbbbcd17a6c618cdd95cca36718392 --- /dev/null +++ b/outputs_laplace/wnli/roberta-base_adapterstrain_val_0.0001_65/step_9999/prior_precision_kron_all_homo_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cacd1f65dc7c5459ca24f238f53f0274ee4f61b93412e7d6427967604e655d9 +size 1379