jdorairaj commited on Jun 4, 2024

Commit

68fd600

1 Parent(s): cd5fbbd

32,64, batch size

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

outputs/cola/args.json +41 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/args.json +35 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/cola_bert-base-uncased_train_loss.png +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/cola_bert-base-uncased_validation_loss.png +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/logfile.log +122 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/logfile_la_all.log +30 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/README.md +202 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/adapter_config.json +32 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/adapter_model.safetensors +3 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/all_results.json +1 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/all_results_val.json +1 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/eval_res.json +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/gpu_stats.json +130 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/gpu_stats_la.json +130 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/special_tokens_map.json +7 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/tokenizer.json +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/tokenizer_config.json +56 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/val_res.json +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/vocab.txt +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/README.md +202 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/adapter_config.json +32 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/adapter_model.safetensors +3 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/all_results.json +1 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/all_results_val.json +1 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/eval_res.json +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/gpu_stats.json +130 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/gpu_stats_la.json +130 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/special_tokens_map.json +7 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/tokenizer.json +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/tokenizer_config.json +56 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/val_res.json +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/vocab.txt +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/README.md +202 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/adapter_config.json +32 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/adapter_model.safetensors +3 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/all_results.json +1 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/all_results_val.json +1 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/eval_res.json +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/gpu_stats.json +130 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/special_tokens_map.json +7 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/tokenizer.json +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/tokenizer_config.json +56 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/val_res.json +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/vocab.txt +0 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_3999/README.md +202 -0
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_3999/adapter_config.json +32 -0

outputs/cola/args.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+    "task_name": "cola",
+    "train_file": null,
+    "validation_file": null,
+    "max_length": 300,
+    "pad_to_max_length": false,
+    "model_name_or_path": "bert-base-uncased",
+    "use_slow_tokenizer": false,
+    "per_device_train_batch_size": 64,
+    "per_device_eval_batch_size": 64,
+    "learning_rate": 5e-05,
+    "weight_decay": 0.0,
+    "num_train_epochs": 3,
+    "max_train_steps": 10000,
+    "peft_method": null,
+    "gradient_accumulation_steps": 1,
+    "lr_scheduler_type": "linear",
+    "num_warmup_steps": 0,
+    "output_dir": "./outputs",
+    "seed": 42,
+    "push_to_hub": false,
+    "hub_model_id": null,
+    "hub_token": null,
+    "checkpointing_steps": "1000",
+    "resume_from_checkpoint": null,
+    "with_tracking": false,
+    "report_to": "all",
+    "ignore_mismatched_sizes": true,
+    "save": false,
+    "load_step": 999,
+    "lora_r": 8,
+    "lora_alpha": 16,
+    "lora_dropout": 0.1,
+    "laplace_hessian": "kron",
+    "laplace_sub": "all",
+    "laplace_prior": "homo",
+    "laplace_optim_step": 1000,
+    "testing_set": "train_val",
+    "cache_dir": "/content/cache/huggingface/metrics/",
+    "laplace_predict": "mc_corr"
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/args.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+    "task_name": "cola",
+    "train_file": null,
+    "validation_file": null,
+    "max_length": 300,
+    "pad_to_max_length": false,
+    "model_name_or_path": "bert-base-uncased",
+    "use_slow_tokenizer": false,
+    "per_device_train_batch_size": 64,
+    "per_device_eval_batch_size": 64,
+    "learning_rate": 5e-05,
+    "max_grad_norm": 0.5,
+    "weight_decay": 0.0,
+    "num_train_epochs": 3,
+    "max_train_steps": 10000,
+    "gradient_accumulation_steps": 1,
+    "lr_scheduler_type": "linear",
+    "num_warmup_steps": 0,
+    "output_dir": "./outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000",
+    "seed": 42,
+    "push_to_hub": false,
+    "hub_model_id": null,
+    "hub_token": null,
+    "checkpointing_steps": "1000",
+    "resume_from_checkpoint": null,
+    "with_tracking": false,
+    "report_to": "all",
+    "ignore_mismatched_sizes": true,
+    "save_train_results": false,
+    "lora_r": 8,
+    "lora_alpha": 16,
+    "lora_dropout": 0.1,
+    "testing_set": "train_val",
+    "lm_head": false
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/cola_bert-base-uncased_train_loss.png ADDED Viewed

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/cola_bert-base-uncased_validation_loss.png ADDED Viewed

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/logfile.log ADDED Viewed

	@@ -0,0 +1,122 @@

+06/04/2024 18:58:24 - INFO - __main__ -  Number of labels detected = 2
+06/04/2024 18:58:25 - INFO - __main__ - None
+06/04/2024 18:58:26 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
+06/04/2024 18:58:26 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 18:58:26 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 18:58:26 - INFO - __main__ -  Max training steps before recalculation = 10000
+06/04/2024 18:58:26 - INFO - __main__ -  num_update_steps_per_epoch initial = 107
+06/04/2024 18:58:26 - INFO - __main__ -  num training epochs initial = 3
+06/04/2024 18:58:26 - INFO - __main__ - Adjusted num_train_epochs based on max_train_steps: 3
+06/04/2024 18:58:26 - INFO - __main__ - PeftModelForSequenceClassification(
+  (base_model): LoraModel(
+    (model): BertForSequenceClassification(
+      (bert): BertModel(
+        (embeddings): BertEmbeddings(
+          (word_embeddings): Embedding(30522, 768, padding_idx=0)
+          (position_embeddings): Embedding(512, 768)
+          (token_type_embeddings): Embedding(2, 768)
+          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (encoder): BertEncoder(
+          (layer): ModuleList(
+            (0-11): 12 x BertLayer(
+              (attention): BertAttention(
+                (self): BertSdpaSelfAttention(
+                  (query): lora.Linear(
+                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
+                    (lora_dropout): ModuleDict(
+                      (default): Dropout(p=0.1, inplace=False)
+                    )
+                    (lora_A): ModuleDict(
+                      (default): Linear(in_features=768, out_features=8, bias=False)
+                    )
+                    (lora_B): ModuleDict(
+                      (default): Linear(in_features=8, out_features=768, bias=False)
+                    )
+                    (lora_embedding_A): ParameterDict()
+                    (lora_embedding_B): ParameterDict()
+                  )
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): lora.Linear(
+                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
+                    (lora_dropout): ModuleDict(
+                      (default): Dropout(p=0.1, inplace=False)
+                    )
+                    (lora_A): ModuleDict(
+                      (default): Linear(in_features=768, out_features=8, bias=False)
+                    )
+                    (lora_B): ModuleDict(
+                      (default): Linear(in_features=8, out_features=768, bias=False)
+                    )
+                    (lora_embedding_A): ParameterDict()
+                    (lora_embedding_B): ParameterDict()
+                  )
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+          )
+        )
+        (pooler): BertPooler(
+          (dense): Linear(in_features=768, out_features=768, bias=True)
+          (activation): Tanh()
+        )
+      )
+      (dropout): Dropout(p=0.1, inplace=False)
+      (classifier): ModulesToSaveWrapper(
+        (original_module): Linear(in_features=768, out_features=2, bias=True)
+        (modules_to_save): ModuleDict(
+          (default): Linear(in_features=768, out_features=2, bias=True)
+        )
+      )
+    )
+  )
+)
+06/04/2024 18:58:26 - INFO - __main__ -  num_update_steps_per_epoch before recalculation = 107
+06/04/2024 18:58:26 - INFO - __main__ -  num_update_steps_per_epoch after recalculation = 107
+06/04/2024 18:58:26 - INFO - __main__ -  num training epochs before recalculation = 94
+06/04/2024 18:58:28 - INFO - __main__ - ***** Running training *****
+06/04/2024 18:58:28 - INFO - __main__ -   Num examples = 6840
+06/04/2024 18:58:28 - INFO - __main__ -   Num Epochs = 94
+06/04/2024 18:58:28 - INFO - __main__ -   Instantaneous batch size per device = 64
+06/04/2024 18:58:28 - INFO - __main__ -   Total train batch size (w. parallel, distributed & accumulation) = 64
+06/04/2024 18:58:28 - INFO - __main__ -   Gradient Accumulation steps = 1
+06/04/2024 18:58:28 - INFO - __main__ -   Total optimization steps = 10000
+06/04/2024 18:58:30 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.0915684223547905}
+06/04/2024 18:58:32 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.1315580824298696}
+06/04/2024 19:01:39 - INFO - __main__ - epoch 9: {'matthews_correlation': 0.48363151084768286}
+06/04/2024 19:01:42 - INFO - __main__ - epoch 9: {'matthews_correlation': 0.48026532005810063}
+06/04/2024 19:04:48 - INFO - __main__ - epoch 18: {'matthews_correlation': 0.4747587637452304}
+06/04/2024 19:04:51 - INFO - __main__ - epoch 18: {'matthews_correlation': 0.49141635235201747}
+06/04/2024 19:07:57 - INFO - __main__ - epoch 28: {'matthews_correlation': 0.5099888407051765}
+06/04/2024 19:07:59 - INFO - __main__ - epoch 28: {'matthews_correlation': 0.49686152666715383}
+06/04/2024 19:11:06 - INFO - __main__ - epoch 37: {'matthews_correlation': 0.5286178863044644}
+06/04/2024 19:11:09 - INFO - __main__ - epoch 37: {'matthews_correlation': 0.5047475278422677}
+06/04/2024 19:14:15 - INFO - __main__ - epoch 46: {'matthews_correlation': 0.5243897017420636}
+06/04/2024 19:14:18 - INFO - __main__ - epoch 46: {'matthews_correlation': 0.5020901391068266}
+06/04/2024 19:17:24 - INFO - __main__ - epoch 56: {'matthews_correlation': 0.5410897632107913}
+06/04/2024 19:17:27 - INFO - __main__ - epoch 56: {'matthews_correlation': 0.5122402357220024}
+06/04/2024 19:20:32 - INFO - __main__ - epoch 65: {'matthews_correlation': 0.5265067723079826}
+06/04/2024 19:20:35 - INFO - __main__ - epoch 65: {'matthews_correlation': 0.5132824489782705}
+06/04/2024 19:23:41 - INFO - __main__ - epoch 74: {'matthews_correlation': 0.5213763355102656}
+06/04/2024 19:23:44 - INFO - __main__ - epoch 74: {'matthews_correlation': 0.514991763159774}
+06/04/2024 19:26:51 - INFO - __main__ - epoch 84: {'matthews_correlation': 0.5245973684146213}
+06/04/2024 19:26:53 - INFO - __main__ - epoch 84: {'matthews_correlation': 0.5248941625108541}
+06/04/2024 19:29:59 - INFO - __main__ - epoch 93: {'matthews_correlation': 0.5294768861655004}
+06/04/2024 19:30:02 - INFO - __main__ - epoch 93: {'matthews_correlation': 0.5252994742941989}
+06/04/2024 19:30:02 - INFO - __main__ - ***** Completed training *****

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/logfile_la_all.log ADDED Viewed

	@@ -0,0 +1,30 @@

+06/04/2024 19:30:43 - INFO - __main__ - ***** Starting script *****
+06/04/2024 19:30:46 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
+06/04/2024 19:30:46 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:30:46 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:31:49 - INFO - __main__ - ***** Completed Script *****
+06/04/2024 19:31:54 - INFO - __main__ - ***** Starting script *****
+06/04/2024 19:31:56 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
+06/04/2024 19:31:56 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:31:56 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:33:03 - INFO - __main__ - ***** Completed Script *****
+06/04/2024 19:33:08 - INFO - __main__ - ***** Starting script *****
+06/04/2024 19:33:11 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
+06/04/2024 19:33:11 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:33:11 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:34:14 - INFO - __main__ - ***** Completed Script *****
+06/04/2024 19:34:18 - INFO - __main__ - ***** Starting script *****
+06/04/2024 19:34:20 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
+06/04/2024 19:34:20 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:34:20 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:35:24 - INFO - __main__ - ***** Completed Script *****
+06/04/2024 19:35:28 - INFO - __main__ - ***** Starting script *****
+06/04/2024 19:35:30 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
+06/04/2024 19:35:30 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:35:30 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:36:35 - INFO - __main__ - ***** Completed Script *****
+06/04/2024 19:36:40 - INFO - __main__ - ***** Starting script *****
+06/04/2024 19:36:42 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
+06/04/2024 19:36:42 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:36:42 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
+06/04/2024 19:37:47 - INFO - __main__ - ***** Completed Script *****

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: bert-base-uncased
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.11.1

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bert-base-uncased",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "value",
+    "query"
+  ],
+  "task_type": "SEQ_CLS",
+  "use_dora": false,
+  "use_rslora": false
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68ad9f6c556fa0cefdee2d3f1c058573923e8b0afefb79923f14fc1d96825ae9
+size 1192672

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/all_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eval_matthews_correlation": 0.0915684223547905}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eval_matthews_correlation": 0.10206949543458196}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/all_results_val.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eval_matthews_correlation": 0.1315580824298696}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/eval_res.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/gpu_stats.json ADDED Viewed

	@@ -0,0 +1,130 @@

+{
+    "memory_allocated": 449368064,
+    "max_memory_allocated": 525788672,
+    "memory_reserved": 717225984,
+    "max_memory_reserved": 717225984,
+    "memory_stats": {
+        "active.all.allocated": 23350,
+        "active.all.current": 266,
+        "active.all.freed": 23084,
+        "active.all.peak": 280,
+        "active.large_pool.allocated": 9560,
+        "active.large_pool.current": 76,
+        "active.large_pool.freed": 9484,
+        "active.large_pool.peak": 83,
+        "active.small_pool.allocated": 13790,
+        "active.small_pool.current": 190,
+        "active.small_pool.freed": 13600,
+        "active.small_pool.peak": 200,
+        "active_bytes.all.allocated": 56919072768,
+        "active_bytes.all.current": 449368064,
+        "active_bytes.all.freed": 56469704704,
+        "active_bytes.all.peak": 525788672,
+        "active_bytes.large_pool.allocated": 53192540160,
+        "active_bytes.large_pool.current": 447610880,
+        "active_bytes.large_pool.freed": 52744929280,
+        "active_bytes.large_pool.peak": 523632640,
+        "active_bytes.small_pool.allocated": 3726532608,
+        "active_bytes.small_pool.current": 1757184,
+        "active_bytes.small_pool.freed": 3724775424,
+        "active_bytes.small_pool.peak": 7070208,
+        "allocated_bytes.all.allocated": 56919072768,
+        "allocated_bytes.all.current": 449368064,
+        "allocated_bytes.all.freed": 56469704704,
+        "allocated_bytes.all.peak": 525788672,
+        "allocated_bytes.large_pool.allocated": 53192540160,
+        "allocated_bytes.large_pool.current": 447610880,
+        "allocated_bytes.large_pool.freed": 52744929280,
+        "allocated_bytes.large_pool.peak": 523632640,
+        "allocated_bytes.small_pool.allocated": 3726532608,
+        "allocated_bytes.small_pool.current": 1757184,
+        "allocated_bytes.small_pool.freed": 3724775424,
+        "allocated_bytes.small_pool.peak": 7070208,
+        "allocation.all.allocated": 23350,
+        "allocation.all.current": 266,
+        "allocation.all.freed": 23084,
+        "allocation.all.peak": 280,
+        "allocation.large_pool.allocated": 9560,
+        "allocation.large_pool.current": 76,
+        "allocation.large_pool.freed": 9484,
+        "allocation.large_pool.peak": 83,
+        "allocation.small_pool.allocated": 13790,
+        "allocation.small_pool.current": 190,
+        "allocation.small_pool.freed": 13600,
+        "allocation.small_pool.peak": 200,
+        "inactive_split.all.allocated": 13585,
+        "inactive_split.all.current": 25,
+        "inactive_split.all.freed": 13560,
+        "inactive_split.all.peak": 33,
+        "inactive_split.large_pool.allocated": 6314,
+        "inactive_split.large_pool.current": 19,
+        "inactive_split.large_pool.freed": 6295,
+        "inactive_split.large_pool.peak": 26,
+        "inactive_split.small_pool.allocated": 7271,
+        "inactive_split.small_pool.current": 6,
+        "inactive_split.small_pool.freed": 7265,
+        "inactive_split.small_pool.peak": 12,
+        "inactive_split_bytes.all.allocated": 48796108288,
+        "inactive_split_bytes.all.current": 45559808,
+        "inactive_split_bytes.all.freed": 48750548480,
+        "inactive_split_bytes.all.peak": 93572096,
+        "inactive_split_bytes.large_pool.allocated": 44973031424,
+        "inactive_split_bytes.large_pool.current": 45219840,
+        "inactive_split_bytes.large_pool.freed": 44927811584,
+        "inactive_split_bytes.large_pool.peak": 93323264,
+        "inactive_split_bytes.small_pool.allocated": 3823076864,
+        "inactive_split_bytes.small_pool.current": 339968,
+        "inactive_split_bytes.small_pool.freed": 3822736896,
+        "inactive_split_bytes.small_pool.peak": 5402112,
+        "max_split_size": -1,
+        "num_alloc_retries": 0,
+        "num_device_alloc": 34,
+        "num_device_free": 0,
+        "num_ooms": 0,
+        "num_sync_all_streams": 0,
+        "oversize_allocations.allocated": 0,
+        "oversize_allocations.current": 0,
+        "oversize_allocations.freed": 0,
+        "oversize_allocations.peak": 0,
+        "oversize_segments.allocated": 0,
+        "oversize_segments.current": 0,
+        "oversize_segments.freed": 0,
+        "oversize_segments.peak": 0,
+        "requested_bytes.all.allocated": 56141194670,
+        "requested_bytes.all.current": 447707288,
+        "requested_bytes.all.freed": 55693487382,
+        "requested_bytes.all.peak": 523801880,
+        "requested_bytes.large_pool.allocated": 52417914880,
+        "requested_bytes.large_pool.current": 445954048,
+        "requested_bytes.large_pool.freed": 51971960832,
+        "requested_bytes.large_pool.peak": 521648128,
+        "requested_bytes.small_pool.allocated": 3723279790,
+        "requested_bytes.small_pool.current": 1753240,
+        "requested_bytes.small_pool.freed": 3721526550,
+        "requested_bytes.small_pool.peak": 7067140,
+        "reserved_bytes.all.allocated": 717225984,
+        "reserved_bytes.all.current": 717225984,
+        "reserved_bytes.all.freed": 0,
+        "reserved_bytes.all.peak": 717225984,
+        "reserved_bytes.large_pool.allocated": 706740224,
+        "reserved_bytes.large_pool.current": 706740224,
+        "reserved_bytes.large_pool.freed": 0,
+        "reserved_bytes.large_pool.peak": 706740224,
+        "reserved_bytes.small_pool.allocated": 10485760,
+        "reserved_bytes.small_pool.current": 10485760,
+        "reserved_bytes.small_pool.freed": 0,
+        "reserved_bytes.small_pool.peak": 10485760,
+        "segment.all.allocated": 34,
+        "segment.all.current": 34,
+        "segment.all.freed": 0,
+        "segment.all.peak": 34,
+        "segment.large_pool.allocated": 29,
+        "segment.large_pool.current": 29,
+        "segment.large_pool.freed": 0,
+        "segment.large_pool.peak": 29,
+        "segment.small_pool.allocated": 5,
+        "segment.small_pool.current": 5,
+        "segment.small_pool.freed": 0,
+        "segment.small_pool.peak": 5
+    }
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/gpu_stats_la.json ADDED Viewed

	@@ -0,0 +1,130 @@

+{
+    "memory_allocated": 812003328,
+    "max_memory_allocated": 3935051264,
+    "memory_reserved": 10775166976,
+    "max_memory_reserved": 10775166976,
+    "memory_stats": {
+        "active.all.allocated": 625956,
+        "active.all.current": 735,
+        "active.all.freed": 625221,
+        "active.all.peak": 909,
+        "active.large_pool.allocated": 124456,
+        "active.large_pool.current": 222,
+        "active.large_pool.freed": 124234,
+        "active.large_pool.peak": 356,
+        "active.small_pool.allocated": 501500,
+        "active.small_pool.current": 513,
+        "active.small_pool.freed": 500987,
+        "active.small_pool.peak": 673,
+        "active_bytes.all.allocated": 803395350016,
+        "active_bytes.all.current": 812003328,
+        "active_bytes.all.freed": 802583346688,
+        "active_bytes.all.peak": 3935051264,
+        "active_bytes.large_pool.allocated": 784489274368,
+        "active_bytes.large_pool.current": 808820736,
+        "active_bytes.large_pool.freed": 783680453632,
+        "active_bytes.large_pool.peak": 3930087424,
+        "active_bytes.small_pool.allocated": 18906075648,
+        "active_bytes.small_pool.current": 3182592,
+        "active_bytes.small_pool.freed": 18902893056,
+        "active_bytes.small_pool.peak": 87769600,
+        "allocated_bytes.all.allocated": 803395350016,
+        "allocated_bytes.all.current": 812003328,
+        "allocated_bytes.all.freed": 802583346688,
+        "allocated_bytes.all.peak": 3935051264,
+        "allocated_bytes.large_pool.allocated": 784489274368,
+        "allocated_bytes.large_pool.current": 808820736,
+        "allocated_bytes.large_pool.freed": 783680453632,
+        "allocated_bytes.large_pool.peak": 3930087424,
+        "allocated_bytes.small_pool.allocated": 18906075648,
+        "allocated_bytes.small_pool.current": 3182592,
+        "allocated_bytes.small_pool.freed": 18902893056,
+        "allocated_bytes.small_pool.peak": 87769600,
+        "allocation.all.allocated": 625956,
+        "allocation.all.current": 735,
+        "allocation.all.freed": 625221,
+        "allocation.all.peak": 909,
+        "allocation.large_pool.allocated": 124456,
+        "allocation.large_pool.current": 222,
+        "allocation.large_pool.freed": 124234,
+        "allocation.large_pool.peak": 356,
+        "allocation.small_pool.allocated": 501500,
+        "allocation.small_pool.current": 513,
+        "allocation.small_pool.freed": 500987,
+        "allocation.small_pool.peak": 673,
+        "inactive_split.all.allocated": 270751,
+        "inactive_split.all.current": 99,
+        "inactive_split.all.freed": 270652,
+        "inactive_split.all.peak": 204,
+        "inactive_split.large_pool.allocated": 84601,
+        "inactive_split.large_pool.current": 69,
+        "inactive_split.large_pool.freed": 84532,
+        "inactive_split.large_pool.peak": 154,
+        "inactive_split.small_pool.allocated": 186150,
+        "inactive_split.small_pool.current": 30,
+        "inactive_split.small_pool.freed": 186120,
+        "inactive_split.small_pool.peak": 81,
+        "inactive_split_bytes.all.allocated": 676749936128,
+        "inactive_split_bytes.all.current": 404344832,
+        "inactive_split_bytes.all.freed": 676345591296,
+        "inactive_split_bytes.all.peak": 1178222080,
+        "inactive_split_bytes.large_pool.allocated": 657372517376,
+        "inactive_split_bytes.large_pool.current": 397041664,
+        "inactive_split_bytes.large_pool.freed": 656975475712,
+        "inactive_split_bytes.large_pool.peak": 1171644416,
+        "inactive_split_bytes.small_pool.allocated": 19377418752,
+        "inactive_split_bytes.small_pool.current": 7303168,
+        "inactive_split_bytes.small_pool.freed": 19370115584,
+        "inactive_split_bytes.small_pool.peak": 42167296,
+        "max_split_size": -1,
+        "num_alloc_retries": 0,
+        "num_device_alloc": 395,
+        "num_device_free": 78,
+        "num_ooms": 0,
+        "num_sync_all_streams": 1,
+        "oversize_allocations.allocated": 0,
+        "oversize_allocations.current": 0,
+        "oversize_allocations.freed": 0,
+        "oversize_allocations.peak": 0,
+        "oversize_segments.allocated": 0,
+        "oversize_segments.current": 0,
+        "oversize_segments.freed": 0,
+        "oversize_segments.peak": 0,
+        "requested_bytes.all.allocated": 795325824846,
+        "requested_bytes.all.current": 798508896,
+        "requested_bytes.all.freed": 794527315950,
+        "requested_bytes.all.peak": 3916922592,
+        "requested_bytes.large_pool.allocated": 776518211392,
+        "requested_bytes.large_pool.current": 795392000,
+        "requested_bytes.large_pool.freed": 775722819392,
+        "requested_bytes.large_pool.peak": 3912022016,
+        "requested_bytes.small_pool.allocated": 18807613454,
+        "requested_bytes.small_pool.current": 3116896,
+        "requested_bytes.small_pool.freed": 18804496558,
+        "requested_bytes.small_pool.peak": 87693896,
+        "reserved_bytes.all.allocated": 12910067712,
+        "reserved_bytes.all.current": 10775166976,
+        "reserved_bytes.all.freed": 2134900736,
+        "reserved_bytes.all.peak": 10775166976,
+        "reserved_bytes.large_pool.allocated": 12775849984,
+        "reserved_bytes.large_pool.current": 10651435008,
+        "reserved_bytes.large_pool.freed": 2124414976,
+        "reserved_bytes.large_pool.peak": 10651435008,
+        "reserved_bytes.small_pool.allocated": 134217728,
+        "reserved_bytes.small_pool.current": 123731968,
+        "reserved_bytes.small_pool.freed": 10485760,
+        "reserved_bytes.small_pool.peak": 123731968,
+        "segment.all.allocated": 395,
+        "segment.all.current": 317,
+        "segment.all.freed": 78,
+        "segment.all.peak": 317,
+        "segment.large_pool.allocated": 331,
+        "segment.large_pool.current": 258,
+        "segment.large_pool.freed": 73,
+        "segment.large_pool.peak": 258,
+        "segment.small_pool.allocated": 64,
+        "segment.small_pool.current": 59,
+        "segment.small_pool.freed": 5,
+        "segment.small_pool.peak": 59
+    }
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "padding_side": "left",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/val_res.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_0/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: bert-base-uncased
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.11.1

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bert-base-uncased",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "value",
+    "query"
+  ],
+  "task_type": "SEQ_CLS",
+  "use_dora": false,
+  "use_rslora": false
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e7fd2a9ba601173e37c660a4325b2b74f7007618b4a250d080984b8b7397df3
+size 1192672

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/all_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eval_matthews_correlation": 0.4747587637452304}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eval_matthews_correlation": 0.44985503774934166}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/all_results_val.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eval_matthews_correlation": 0.49141635235201747}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/eval_res.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/gpu_stats.json ADDED Viewed

	@@ -0,0 +1,130 @@

+{
+    "memory_allocated": 460261888,
+    "max_memory_allocated": 2010146816,
+    "memory_reserved": 2988441600,
+    "max_memory_reserved": 2988441600,
+    "memory_stats": {
+        "active.all.allocated": 1923725,
+        "active.all.current": 367,
+        "active.all.freed": 1923358,
+        "active.all.peak": 627,
+        "active.large_pool.allocated": 1172005,
+        "active.large_pool.current": 77,
+        "active.large_pool.freed": 1171928,
+        "active.large_pool.peak": 237,
+        "active.small_pool.allocated": 751720,
+        "active.small_pool.current": 290,
+        "active.small_pool.freed": 751430,
+        "active.small_pool.peak": 442,
+        "active_bytes.all.allocated": 7544348726784,
+        "active_bytes.all.current": 460261888,
+        "active_bytes.all.freed": 7543888464896,
+        "active_bytes.all.peak": 2010146816,
+        "active_bytes.large_pool.allocated": 7343544013824,
+        "active_bytes.large_pool.current": 456130560,
+        "active_bytes.large_pool.freed": 7343087883264,
+        "active_bytes.large_pool.peak": 1993228288,
+        "active_bytes.small_pool.allocated": 200804712960,
+        "active_bytes.small_pool.current": 4131328,
+        "active_bytes.small_pool.freed": 200800581632,
+        "active_bytes.small_pool.peak": 57125888,
+        "allocated_bytes.all.allocated": 7544348726784,
+        "allocated_bytes.all.current": 460261888,
+        "allocated_bytes.all.freed": 7543888464896,
+        "allocated_bytes.all.peak": 2010146816,
+        "allocated_bytes.large_pool.allocated": 7343544013824,
+        "allocated_bytes.large_pool.current": 456130560,
+        "allocated_bytes.large_pool.freed": 7343087883264,
+        "allocated_bytes.large_pool.peak": 1993228288,
+        "allocated_bytes.small_pool.allocated": 200804712960,
+        "allocated_bytes.small_pool.current": 4131328,
+        "allocated_bytes.small_pool.freed": 200800581632,
+        "allocated_bytes.small_pool.peak": 57125888,
+        "allocation.all.allocated": 1923725,
+        "allocation.all.current": 367,
+        "allocation.all.freed": 1923358,
+        "allocation.all.peak": 627,
+        "allocation.large_pool.allocated": 1172005,
+        "allocation.large_pool.current": 77,
+        "allocation.large_pool.freed": 1171928,
+        "allocation.large_pool.peak": 237,
+        "allocation.small_pool.allocated": 751720,
+        "allocation.small_pool.current": 290,
+        "allocation.small_pool.freed": 751430,
+        "allocation.small_pool.peak": 442,
+        "inactive_split.all.allocated": 1110411,
+        "inactive_split.all.current": 36,
+        "inactive_split.all.freed": 1110375,
+        "inactive_split.all.peak": 93,
+        "inactive_split.large_pool.allocated": 833080,
+        "inactive_split.large_pool.current": 21,
+        "inactive_split.large_pool.freed": 833059,
+        "inactive_split.large_pool.peak": 52,
+        "inactive_split.small_pool.allocated": 277331,
+        "inactive_split.small_pool.current": 15,
+        "inactive_split.small_pool.freed": 277316,
+        "inactive_split.small_pool.peak": 68,
+        "inactive_split_bytes.all.allocated": 6850050572800,
+        "inactive_split_bytes.all.current": 59831808,
+        "inactive_split_bytes.all.freed": 6849990740992,
+        "inactive_split_bytes.all.peak": 260571648,
+        "inactive_split_bytes.large_pool.allocated": 6639523213312,
+        "inactive_split_bytes.large_pool.current": 57671680,
+        "inactive_split_bytes.large_pool.freed": 6639465541632,
+        "inactive_split_bytes.large_pool.peak": 259358720,
+        "inactive_split_bytes.small_pool.allocated": 210527359488,
+        "inactive_split_bytes.small_pool.current": 2160128,
+        "inactive_split_bytes.small_pool.freed": 210525199360,
+        "inactive_split_bytes.small_pool.peak": 41197056,
+        "max_split_size": -1,
+        "num_alloc_retries": 0,
+        "num_device_alloc": 138,
+        "num_device_free": 0,
+        "num_ooms": 0,
+        "num_sync_all_streams": 0,
+        "oversize_allocations.allocated": 0,
+        "oversize_allocations.current": 0,
+        "oversize_allocations.freed": 0,
+        "oversize_allocations.peak": 0,
+        "oversize_segments.allocated": 0,
+        "oversize_segments.current": 0,
+        "oversize_segments.freed": 0,
+        "oversize_segments.peak": 0,
+        "requested_bytes.all.allocated": 7475111813445,
+        "requested_bytes.all.current": 458600104,
+        "requested_bytes.all.freed": 7474653213341,
+        "requested_bytes.all.peak": 1991449276,
+        "requested_bytes.large_pool.allocated": 7274343557120,
+        "requested_bytes.large_pool.current": 454473728,
+        "requested_bytes.large_pool.freed": 7273889083392,
+        "requested_bytes.large_pool.peak": 1974548480,
+        "requested_bytes.small_pool.allocated": 200768256325,
+        "requested_bytes.small_pool.current": 4126376,
+        "requested_bytes.small_pool.freed": 200764129949,
+        "requested_bytes.small_pool.peak": 57108412,
+        "reserved_bytes.all.allocated": 2988441600,
+        "reserved_bytes.all.current": 2988441600,
+        "reserved_bytes.all.freed": 0,
+        "reserved_bytes.all.peak": 2988441600,
+        "reserved_bytes.large_pool.allocated": 2929721344,
+        "reserved_bytes.large_pool.current": 2929721344,
+        "reserved_bytes.large_pool.freed": 0,
+        "reserved_bytes.large_pool.peak": 2929721344,
+        "reserved_bytes.small_pool.allocated": 58720256,
+        "reserved_bytes.small_pool.current": 58720256,
+        "reserved_bytes.small_pool.freed": 0,
+        "reserved_bytes.small_pool.peak": 58720256,
+        "segment.all.allocated": 138,
+        "segment.all.current": 138,
+        "segment.all.freed": 0,
+        "segment.all.peak": 138,
+        "segment.large_pool.allocated": 110,
+        "segment.large_pool.current": 110,
+        "segment.large_pool.freed": 0,
+        "segment.large_pool.peak": 110,
+        "segment.small_pool.allocated": 28,
+        "segment.small_pool.current": 28,
+        "segment.small_pool.freed": 0,
+        "segment.small_pool.peak": 28
+    }
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/gpu_stats_la.json ADDED Viewed

	@@ -0,0 +1,130 @@

+{
+    "memory_allocated": 811798528,
+    "max_memory_allocated": 3935051264,
+    "memory_reserved": 10794041344,
+    "max_memory_reserved": 10794041344,
+    "memory_stats": {
+        "active.all.allocated": 1251910,
+        "active.all.current": 735,
+        "active.all.freed": 1251175,
+        "active.all.peak": 909,
+        "active.large_pool.allocated": 248910,
+        "active.large_pool.current": 222,
+        "active.large_pool.freed": 248688,
+        "active.large_pool.peak": 356,
+        "active.small_pool.allocated": 1003000,
+        "active.small_pool.current": 513,
+        "active.small_pool.freed": 1002487,
+        "active.small_pool.peak": 673,
+        "active_bytes.all.allocated": 1606442668032,
+        "active_bytes.all.current": 811798528,
+        "active_bytes.all.freed": 1605630869504,
+        "active_bytes.all.peak": 3935051264,
+        "active_bytes.large_pool.allocated": 1568630516736,
+        "active_bytes.large_pool.current": 808615936,
+        "active_bytes.large_pool.freed": 1567821900800,
+        "active_bytes.large_pool.peak": 3930087424,
+        "active_bytes.small_pool.allocated": 37812151296,
+        "active_bytes.small_pool.current": 3182592,
+        "active_bytes.small_pool.freed": 37808968704,
+        "active_bytes.small_pool.peak": 87769600,
+        "allocated_bytes.all.allocated": 1606442668032,
+        "allocated_bytes.all.current": 811798528,
+        "allocated_bytes.all.freed": 1605630869504,
+        "allocated_bytes.all.peak": 3935051264,
+        "allocated_bytes.large_pool.allocated": 1568630516736,
+        "allocated_bytes.large_pool.current": 808615936,
+        "allocated_bytes.large_pool.freed": 1567821900800,
+        "allocated_bytes.large_pool.peak": 3930087424,
+        "allocated_bytes.small_pool.allocated": 37812151296,
+        "allocated_bytes.small_pool.current": 3182592,
+        "allocated_bytes.small_pool.freed": 37808968704,
+        "allocated_bytes.small_pool.peak": 87769600,
+        "allocation.all.allocated": 1251910,
+        "allocation.all.current": 735,
+        "allocation.all.freed": 1251175,
+        "allocation.all.peak": 909,
+        "allocation.large_pool.allocated": 248910,
+        "allocation.large_pool.current": 222,
+        "allocation.large_pool.freed": 248688,
+        "allocation.large_pool.peak": 356,
+        "allocation.small_pool.allocated": 1003000,
+        "allocation.small_pool.current": 513,
+        "allocation.small_pool.freed": 1002487,
+        "allocation.small_pool.peak": 673,
+        "inactive_split.all.allocated": 558702,
+        "inactive_split.all.current": 111,
+        "inactive_split.all.freed": 558591,
+        "inactive_split.all.peak": 211,
+        "inactive_split.large_pool.allocated": 169923,
+        "inactive_split.large_pool.current": 77,
+        "inactive_split.large_pool.freed": 169846,
+        "inactive_split.large_pool.peak": 162,
+        "inactive_split.small_pool.allocated": 388779,
+        "inactive_split.small_pool.current": 34,
+        "inactive_split.small_pool.freed": 388745,
+        "inactive_split.small_pool.peak": 84,
+        "inactive_split_bytes.all.allocated": 1352252681216,
+        "inactive_split_bytes.all.current": 465367040,
+        "inactive_split_bytes.all.freed": 1351787314176,
+        "inactive_split_bytes.all.peak": 1178222080,
+        "inactive_split_bytes.large_pool.allocated": 1313416041472,
+        "inactive_split_bytes.large_pool.current": 458063872,
+        "inactive_split_bytes.large_pool.freed": 1312957977600,
+        "inactive_split_bytes.large_pool.peak": 1171644416,
+        "inactive_split_bytes.small_pool.allocated": 38836639744,
+        "inactive_split_bytes.small_pool.current": 7303168,
+        "inactive_split_bytes.small_pool.freed": 38829336576,
+        "inactive_split_bytes.small_pool.peak": 42167296,
+        "max_split_size": -1,
+        "num_alloc_retries": 0,
+        "num_device_alloc": 732,
+        "num_device_free": 414,
+        "num_ooms": 0,
+        "num_sync_all_streams": 3,
+        "oversize_allocations.allocated": 0,
+        "oversize_allocations.current": 0,
+        "oversize_allocations.freed": 0,
+        "oversize_allocations.peak": 0,
+        "oversize_segments.allocated": 0,
+        "oversize_segments.current": 0,
+        "oversize_segments.freed": 0,
+        "oversize_segments.peak": 0,
+        "requested_bytes.all.allocated": 1590634610332,
+        "requested_bytes.all.current": 798508896,
+        "requested_bytes.all.freed": 1589836101436,
+        "requested_bytes.all.peak": 3916922592,
+        "requested_bytes.large_pool.allocated": 1553019383424,
+        "requested_bytes.large_pool.current": 795392000,
+        "requested_bytes.large_pool.freed": 1552223991424,
+        "requested_bytes.large_pool.peak": 3912022016,
+        "requested_bytes.small_pool.allocated": 37615226908,
+        "requested_bytes.small_pool.current": 3116896,
+        "requested_bytes.small_pool.freed": 37612110012,
+        "requested_bytes.small_pool.peak": 87693896,
+        "reserved_bytes.all.allocated": 24597495808,
+        "reserved_bytes.all.current": 10794041344,
+        "reserved_bytes.all.freed": 13803454464,
+        "reserved_bytes.all.peak": 10794041344,
+        "reserved_bytes.large_pool.allocated": 24339546112,
+        "reserved_bytes.large_pool.current": 10670309376,
+        "reserved_bytes.large_pool.freed": 13669236736,
+        "reserved_bytes.large_pool.peak": 10670309376,
+        "reserved_bytes.small_pool.allocated": 257949696,
+        "reserved_bytes.small_pool.current": 123731968,
+        "reserved_bytes.small_pool.freed": 134217728,
+        "reserved_bytes.small_pool.peak": 123731968,
+        "segment.all.allocated": 732,
+        "segment.all.current": 318,
+        "segment.all.freed": 414,
+        "segment.all.peak": 318,
+        "segment.large_pool.allocated": 609,
+        "segment.large_pool.current": 259,
+        "segment.large_pool.freed": 350,
+        "segment.large_pool.peak": 259,
+        "segment.small_pool.allocated": 123,
+        "segment.small_pool.current": 59,
+        "segment.small_pool.freed": 64,
+        "segment.small_pool.peak": 59
+    }
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "padding_side": "left",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/val_res.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_1999/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: bert-base-uncased
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.11.1

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bert-base-uncased",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "value",
+    "query"
+  ],
+  "task_type": "SEQ_CLS",
+  "use_dora": false,
+  "use_rslora": false
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:141ba233a966dba69f73733976f5f1fa5d3250aca7e9cd4ee25a521f2d9b9625
+size 1192672

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/all_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eval_matthews_correlation": 0.5099888407051765}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/all_results_val.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eval_matthews_correlation": 0.49686152666715383}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/eval_res.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/gpu_stats.json ADDED Viewed

	@@ -0,0 +1,130 @@

+{
+    "memory_allocated": 460261888,
+    "max_memory_allocated": 2010146816,
+    "memory_reserved": 2988441600,
+    "max_memory_reserved": 2988441600,
+    "memory_stats": {
+        "active.all.allocated": 2874441,
+        "active.all.current": 367,
+        "active.all.freed": 2874074,
+        "active.all.peak": 627,
+        "active.large_pool.allocated": 1753295,
+        "active.large_pool.current": 77,
+        "active.large_pool.freed": 1753218,
+        "active.large_pool.peak": 237,
+        "active.small_pool.allocated": 1121146,
+        "active.small_pool.current": 290,
+        "active.small_pool.freed": 1120856,
+        "active.small_pool.peak": 442,
+        "active_bytes.all.allocated": 11281343063552,
+        "active_bytes.all.current": 460261888,
+        "active_bytes.all.freed": 11280882801664,
+        "active_bytes.all.peak": 2010146816,
+        "active_bytes.large_pool.allocated": 10981747706880,
+        "active_bytes.large_pool.current": 456130560,
+        "active_bytes.large_pool.freed": 10981291576320,
+        "active_bytes.large_pool.peak": 1993228288,
+        "active_bytes.small_pool.allocated": 299595356672,
+        "active_bytes.small_pool.current": 4131328,
+        "active_bytes.small_pool.freed": 299591225344,
+        "active_bytes.small_pool.peak": 57125888,
+        "allocated_bytes.all.allocated": 11281343063552,
+        "allocated_bytes.all.current": 460261888,
+        "allocated_bytes.all.freed": 11280882801664,
+        "allocated_bytes.all.peak": 2010146816,
+        "allocated_bytes.large_pool.allocated": 10981747706880,
+        "allocated_bytes.large_pool.current": 456130560,
+        "allocated_bytes.large_pool.freed": 10981291576320,
+        "allocated_bytes.large_pool.peak": 1993228288,
+        "allocated_bytes.small_pool.allocated": 299595356672,
+        "allocated_bytes.small_pool.current": 4131328,
+        "allocated_bytes.small_pool.freed": 299591225344,
+        "allocated_bytes.small_pool.peak": 57125888,
+        "allocation.all.allocated": 2874441,
+        "allocation.all.current": 367,
+        "allocation.all.freed": 2874074,
+        "allocation.all.peak": 627,
+        "allocation.large_pool.allocated": 1753295,
+        "allocation.large_pool.current": 77,
+        "allocation.large_pool.freed": 1753218,
+        "allocation.large_pool.peak": 237,
+        "allocation.small_pool.allocated": 1121146,
+        "allocation.small_pool.current": 290,
+        "allocation.small_pool.freed": 1120856,
+        "allocation.small_pool.peak": 442,
+        "inactive_split.all.allocated": 1661624,
+        "inactive_split.all.current": 34,
+        "inactive_split.all.freed": 1661590,
+        "inactive_split.all.peak": 94,
+        "inactive_split.large_pool.allocated": 1246548,
+        "inactive_split.large_pool.current": 21,
+        "inactive_split.large_pool.freed": 1246527,
+        "inactive_split.large_pool.peak": 52,
+        "inactive_split.small_pool.allocated": 415076,
+        "inactive_split.small_pool.current": 13,
+        "inactive_split.small_pool.freed": 415063,
+        "inactive_split.small_pool.peak": 70,
+        "inactive_split_bytes.all.allocated": 10250373915648,
+        "inactive_split_bytes.all.current": 59831808,
+        "inactive_split_bytes.all.freed": 10250314083840,
+        "inactive_split_bytes.all.peak": 260571648,
+        "inactive_split_bytes.large_pool.allocated": 9936135877632,
+        "inactive_split_bytes.large_pool.current": 57671680,
+        "inactive_split_bytes.large_pool.freed": 9936078205952,
+        "inactive_split_bytes.large_pool.peak": 259358720,
+        "inactive_split_bytes.small_pool.allocated": 314238038016,
+        "inactive_split_bytes.small_pool.current": 2160128,
+        "inactive_split_bytes.small_pool.freed": 314235877888,
+        "inactive_split_bytes.small_pool.peak": 41197056,
+        "max_split_size": -1,
+        "num_alloc_retries": 0,
+        "num_device_alloc": 138,
+        "num_device_free": 0,
+        "num_ooms": 0,
+        "num_sync_all_streams": 0,
+        "oversize_allocations.allocated": 0,
+        "oversize_allocations.current": 0,
+        "oversize_allocations.freed": 0,
+        "oversize_allocations.peak": 0,
+        "oversize_segments.allocated": 0,
+        "oversize_segments.current": 0,
+        "oversize_segments.freed": 0,
+        "oversize_segments.peak": 0,
+        "requested_bytes.all.allocated": 11178178985275,
+        "requested_bytes.all.current": 458600104,
+        "requested_bytes.all.freed": 11177720385171,
+        "requested_bytes.all.peak": 1991449276,
+        "requested_bytes.large_pool.allocated": 10878636892160,
+        "requested_bytes.large_pool.current": 454473728,
+        "requested_bytes.large_pool.freed": 10878182418432,
+        "requested_bytes.large_pool.peak": 1974548480,
+        "requested_bytes.small_pool.allocated": 299542093115,
+        "requested_bytes.small_pool.current": 4126376,
+        "requested_bytes.small_pool.freed": 299537966739,
+        "requested_bytes.small_pool.peak": 57108412,
+        "reserved_bytes.all.allocated": 2988441600,
+        "reserved_bytes.all.current": 2988441600,
+        "reserved_bytes.all.freed": 0,
+        "reserved_bytes.all.peak": 2988441600,
+        "reserved_bytes.large_pool.allocated": 2929721344,
+        "reserved_bytes.large_pool.current": 2929721344,
+        "reserved_bytes.large_pool.freed": 0,
+        "reserved_bytes.large_pool.peak": 2929721344,
+        "reserved_bytes.small_pool.allocated": 58720256,
+        "reserved_bytes.small_pool.current": 58720256,
+        "reserved_bytes.small_pool.freed": 0,
+        "reserved_bytes.small_pool.peak": 58720256,
+        "segment.all.allocated": 138,
+        "segment.all.current": 138,
+        "segment.all.freed": 0,
+        "segment.all.peak": 138,
+        "segment.large_pool.allocated": 110,
+        "segment.large_pool.current": 110,
+        "segment.large_pool.freed": 0,
+        "segment.large_pool.peak": 110,
+        "segment.small_pool.allocated": 28,
+        "segment.small_pool.current": 28,
+        "segment.small_pool.freed": 0,
+        "segment.small_pool.peak": 28
+    }
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "padding_side": "left",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/val_res.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_2999/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_3999/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: bert-base-uncased
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.11.1

outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42_64_10000/step_3999/adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bert-base-uncased",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "value",
+    "query"
+  ],
+  "task_type": "SEQ_CLS",
+  "use_dora": false,
+  "use_rslora": false
+}