using lfs
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +1 -0
- outputs/qqp/args.json +3 -41
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/args.json +3 -32
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/adapter_config.json +3 -32
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/all_results.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/all_results_la_kron_all_homo_mc_corr_1000.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/all_results_val.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/eval_res.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/gpu_stats.json +3 -130
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/gpu_stats_la.json +3 -130
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/special_tokens_map.json +3 -7
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/tokenizer.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/tokenizer_config.json +3 -55
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/adapter_config.json +3 -32
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/all_results.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/all_results_la_kron_all_homo_mc_corr_1000.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/all_results_val.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/eval_res.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/gpu_stats.json +3 -130
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/gpu_stats_la.json +3 -130
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/special_tokens_map.json +3 -7
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/tokenizer.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/tokenizer_config.json +3 -55
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/adapter_config.json +3 -32
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/all_results.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/all_results_la_kron_all_homo_mc_corr_1000.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/all_results_val.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/eval_res.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/gpu_stats.json +3 -130
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/gpu_stats_la.json +3 -130
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/special_tokens_map.json +3 -7
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/tokenizer.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/tokenizer_config.json +3 -55
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/adapter_config.json +3 -32
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/all_results.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/all_results_la_kron_all_homo_mc_corr_1000.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/all_results_val.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/eval_res.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/gpu_stats.json +3 -130
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/gpu_stats_la.json +3 -130
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/special_tokens_map.json +3 -7
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/tokenizer.json +0 -0
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/tokenizer_config.json +3 -55
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_27290/adapter_config.json +3 -32
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_27290/all_results.json +3 -1
- outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_27290/all_results_la_kron_all_homo_mc_corr_1000.json +3 -1
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
outputs/**/*.json filter=lfs diff=lfs merge=lfs -text
|
outputs/qqp/args.json
CHANGED
|
@@ -1,41 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"validation_file": null,
|
| 5 |
-
"max_length": 300,
|
| 6 |
-
"pad_to_max_length": false,
|
| 7 |
-
"model_name_or_path": "bert-base-uncased",
|
| 8 |
-
"use_slow_tokenizer": false,
|
| 9 |
-
"per_device_train_batch_size": 32,
|
| 10 |
-
"per_device_eval_batch_size": 8,
|
| 11 |
-
"learning_rate": 0.0001,
|
| 12 |
-
"weight_decay": 0.0,
|
| 13 |
-
"num_train_epochs": 3,
|
| 14 |
-
"max_train_steps": null,
|
| 15 |
-
"peft_method": null,
|
| 16 |
-
"gradient_accumulation_steps": 1,
|
| 17 |
-
"lr_scheduler_type": "linear",
|
| 18 |
-
"num_warmup_steps": 0,
|
| 19 |
-
"output_dir": "./outputs",
|
| 20 |
-
"seed": 65,
|
| 21 |
-
"push_to_hub": false,
|
| 22 |
-
"hub_model_id": null,
|
| 23 |
-
"hub_token": null,
|
| 24 |
-
"checkpointing_steps": "1000",
|
| 25 |
-
"resume_from_checkpoint": null,
|
| 26 |
-
"with_tracking": false,
|
| 27 |
-
"report_to": "all",
|
| 28 |
-
"ignore_mismatched_sizes": true,
|
| 29 |
-
"save": false,
|
| 30 |
-
"load_step": 999,
|
| 31 |
-
"lora_r": 8,
|
| 32 |
-
"lora_alpha": 16,
|
| 33 |
-
"lora_dropout": 0.1,
|
| 34 |
-
"laplace_hessian": "kron",
|
| 35 |
-
"laplace_sub": "all",
|
| 36 |
-
"laplace_prior": "homo",
|
| 37 |
-
"laplace_optim_step": 1000,
|
| 38 |
-
"testing_set": "train_val",
|
| 39 |
-
"cache_dir": "/content/cache/huggingface/metrics/",
|
| 40 |
-
"laplace_predict": "mc_corr"
|
| 41 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36f4c55f7083c703dc8ed9be8cba0336bd837b325dbcaff4f3e323dcf587d140
|
| 3 |
+
size 1152
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/args.json
CHANGED
|
@@ -1,32 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"validation_file": null,
|
| 5 |
-
"max_length": 300,
|
| 6 |
-
"pad_to_max_length": false,
|
| 7 |
-
"model_name_or_path": "bert-base-uncased",
|
| 8 |
-
"use_slow_tokenizer": false,
|
| 9 |
-
"per_device_train_batch_size": 32,
|
| 10 |
-
"per_device_eval_batch_size": 8,
|
| 11 |
-
"learning_rate": 0.0001,
|
| 12 |
-
"weight_decay": 0.0,
|
| 13 |
-
"num_train_epochs": 3,
|
| 14 |
-
"max_train_steps": null,
|
| 15 |
-
"gradient_accumulation_steps": 1,
|
| 16 |
-
"lr_scheduler_type": "linear",
|
| 17 |
-
"num_warmup_steps": 0,
|
| 18 |
-
"output_dir": "./outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65",
|
| 19 |
-
"seed": 65,
|
| 20 |
-
"push_to_hub": false,
|
| 21 |
-
"hub_model_id": null,
|
| 22 |
-
"hub_token": null,
|
| 23 |
-
"checkpointing_steps": null,
|
| 24 |
-
"resume_from_checkpoint": null,
|
| 25 |
-
"with_tracking": false,
|
| 26 |
-
"report_to": "all",
|
| 27 |
-
"ignore_mismatched_sizes": true,
|
| 28 |
-
"lora_r": 8,
|
| 29 |
-
"lora_alpha": 16,
|
| 30 |
-
"lora_dropout": 0.1,
|
| 31 |
-
"testing_set": "train_val"
|
| 32 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eabb031b73f0d5eda72a08456a5ec4273c24d831df84a3447eb559f0d8d2df5d
|
| 3 |
+
size 931
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/adapter_config.json
CHANGED
|
@@ -1,32 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"base_model_name_or_path": "bert-base-uncased",
|
| 5 |
-
"bias": "none",
|
| 6 |
-
"fan_in_fan_out": false,
|
| 7 |
-
"inference_mode": true,
|
| 8 |
-
"init_lora_weights": true,
|
| 9 |
-
"layer_replication": null,
|
| 10 |
-
"layers_pattern": null,
|
| 11 |
-
"layers_to_transform": null,
|
| 12 |
-
"loftq_config": {},
|
| 13 |
-
"lora_alpha": 16,
|
| 14 |
-
"lora_dropout": 0.1,
|
| 15 |
-
"megatron_config": null,
|
| 16 |
-
"megatron_core": "megatron.core",
|
| 17 |
-
"modules_to_save": [
|
| 18 |
-
"classifier",
|
| 19 |
-
"score"
|
| 20 |
-
],
|
| 21 |
-
"peft_type": "LORA",
|
| 22 |
-
"r": 8,
|
| 23 |
-
"rank_pattern": {},
|
| 24 |
-
"revision": null,
|
| 25 |
-
"target_modules": [
|
| 26 |
-
"value",
|
| 27 |
-
"query"
|
| 28 |
-
],
|
| 29 |
-
"task_type": "SEQ_CLS",
|
| 30 |
-
"use_dora": false,
|
| 31 |
-
"use_rslora": false
|
| 32 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81a29a631d083a248a7fe8b5acc0d972f1e62158c7e9d95bdbbeffc2630802a8
|
| 3 |
+
size 668
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/all_results.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b4b1837d6e394da33a3778991c82b29ef9bfe1ee112aaa879f591170f16ea7c
|
| 3 |
+
size 68
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/all_results_la_kron_all_homo_mc_corr_1000.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b4b1837d6e394da33a3778991c82b29ef9bfe1ee112aaa879f591170f16ea7c
|
| 3 |
+
size 68
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/all_results_val.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b267063a5554011a15c9dc7f6a42f59a6b94377288e3a9168f80dfa3105930d
|
| 3 |
+
size 69
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/eval_res.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/gpu_stats.json
CHANGED
|
@@ -1,130 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"memory_reserved": 648019968,
|
| 5 |
-
"max_memory_reserved": 648019968,
|
| 6 |
-
"memory_stats": {
|
| 7 |
-
"active.all.allocated": 5880425,
|
| 8 |
-
"active.all.current": 266,
|
| 9 |
-
"active.all.freed": 5880159,
|
| 10 |
-
"active.all.peak": 280,
|
| 11 |
-
"active.large_pool.allocated": 2290614,
|
| 12 |
-
"active.large_pool.current": 76,
|
| 13 |
-
"active.large_pool.freed": 2290538,
|
| 14 |
-
"active.large_pool.peak": 84,
|
| 15 |
-
"active.small_pool.allocated": 3589811,
|
| 16 |
-
"active.small_pool.current": 190,
|
| 17 |
-
"active.small_pool.freed": 3589621,
|
| 18 |
-
"active.small_pool.peak": 203,
|
| 19 |
-
"active_bytes.all.allocated": 7740209286144,
|
| 20 |
-
"active_bytes.all.current": 448852992,
|
| 21 |
-
"active_bytes.all.freed": 7739760433152,
|
| 22 |
-
"active_bytes.all.peak": 532900352,
|
| 23 |
-
"active_bytes.large_pool.allocated": 5868623697920,
|
| 24 |
-
"active_bytes.large_pool.current": 447086592,
|
| 25 |
-
"active_bytes.large_pool.freed": 5868176611328,
|
| 26 |
-
"active_bytes.large_pool.peak": 531067392,
|
| 27 |
-
"active_bytes.small_pool.allocated": 1871585588224,
|
| 28 |
-
"active_bytes.small_pool.current": 1766400,
|
| 29 |
-
"active_bytes.small_pool.freed": 1871583821824,
|
| 30 |
-
"active_bytes.small_pool.peak": 9081344,
|
| 31 |
-
"allocated_bytes.all.allocated": 7740209286144,
|
| 32 |
-
"allocated_bytes.all.current": 448852992,
|
| 33 |
-
"allocated_bytes.all.freed": 7739760433152,
|
| 34 |
-
"allocated_bytes.all.peak": 532900352,
|
| 35 |
-
"allocated_bytes.large_pool.allocated": 5868623697920,
|
| 36 |
-
"allocated_bytes.large_pool.current": 447086592,
|
| 37 |
-
"allocated_bytes.large_pool.freed": 5868176611328,
|
| 38 |
-
"allocated_bytes.large_pool.peak": 531067392,
|
| 39 |
-
"allocated_bytes.small_pool.allocated": 1871585588224,
|
| 40 |
-
"allocated_bytes.small_pool.current": 1766400,
|
| 41 |
-
"allocated_bytes.small_pool.freed": 1871583821824,
|
| 42 |
-
"allocated_bytes.small_pool.peak": 9081344,
|
| 43 |
-
"allocation.all.allocated": 5880425,
|
| 44 |
-
"allocation.all.current": 266,
|
| 45 |
-
"allocation.all.freed": 5880159,
|
| 46 |
-
"allocation.all.peak": 280,
|
| 47 |
-
"allocation.large_pool.allocated": 2290614,
|
| 48 |
-
"allocation.large_pool.current": 76,
|
| 49 |
-
"allocation.large_pool.freed": 2290538,
|
| 50 |
-
"allocation.large_pool.peak": 84,
|
| 51 |
-
"allocation.small_pool.allocated": 3589811,
|
| 52 |
-
"allocation.small_pool.current": 190,
|
| 53 |
-
"allocation.small_pool.freed": 3589621,
|
| 54 |
-
"allocation.small_pool.peak": 203,
|
| 55 |
-
"inactive_split.all.allocated": 3773981,
|
| 56 |
-
"inactive_split.all.current": 22,
|
| 57 |
-
"inactive_split.all.freed": 3773959,
|
| 58 |
-
"inactive_split.all.peak": 33,
|
| 59 |
-
"inactive_split.large_pool.allocated": 2090315,
|
| 60 |
-
"inactive_split.large_pool.current": 18,
|
| 61 |
-
"inactive_split.large_pool.freed": 2090297,
|
| 62 |
-
"inactive_split.large_pool.peak": 24,
|
| 63 |
-
"inactive_split.small_pool.allocated": 1683666,
|
| 64 |
-
"inactive_split.small_pool.current": 4,
|
| 65 |
-
"inactive_split.small_pool.freed": 1683662,
|
| 66 |
-
"inactive_split.small_pool.peak": 15,
|
| 67 |
-
"inactive_split_bytes.all.allocated": 7808626842112,
|
| 68 |
-
"inactive_split_bytes.all.current": 46074880,
|
| 69 |
-
"inactive_split_bytes.all.freed": 7808580767232,
|
| 70 |
-
"inactive_split_bytes.all.peak": 90030080,
|
| 71 |
-
"inactive_split_bytes.large_pool.allocated": 5881975429120,
|
| 72 |
-
"inactive_split_bytes.large_pool.current": 45744128,
|
| 73 |
-
"inactive_split_bytes.large_pool.freed": 5881929684992,
|
| 74 |
-
"inactive_split_bytes.large_pool.peak": 89755136,
|
| 75 |
-
"inactive_split_bytes.small_pool.allocated": 1926651412992,
|
| 76 |
-
"inactive_split_bytes.small_pool.current": 330752,
|
| 77 |
-
"inactive_split_bytes.small_pool.freed": 1926651082240,
|
| 78 |
-
"inactive_split_bytes.small_pool.peak": 6580736,
|
| 79 |
-
"max_split_size": -1,
|
| 80 |
-
"num_alloc_retries": 0,
|
| 81 |
-
"num_device_alloc": 32,
|
| 82 |
-
"num_device_free": 0,
|
| 83 |
-
"num_ooms": 0,
|
| 84 |
-
"num_sync_all_streams": 0,
|
| 85 |
-
"oversize_allocations.allocated": 0,
|
| 86 |
-
"oversize_allocations.current": 0,
|
| 87 |
-
"oversize_allocations.freed": 0,
|
| 88 |
-
"oversize_allocations.peak": 0,
|
| 89 |
-
"oversize_segments.allocated": 0,
|
| 90 |
-
"oversize_segments.current": 0,
|
| 91 |
-
"oversize_segments.freed": 0,
|
| 92 |
-
"oversize_segments.peak": 0,
|
| 93 |
-
"requested_bytes.all.allocated": 6383968346031,
|
| 94 |
-
"requested_bytes.all.current": 447715384,
|
| 95 |
-
"requested_bytes.all.freed": 6383520630647,
|
| 96 |
-
"requested_bytes.all.peak": 531762008,
|
| 97 |
-
"requested_bytes.large_pool.allocated": 4512820212544,
|
| 98 |
-
"requested_bytes.large_pool.current": 445954048,
|
| 99 |
-
"requested_bytes.large_pool.freed": 4512374258496,
|
| 100 |
-
"requested_bytes.large_pool.peak": 529934848,
|
| 101 |
-
"requested_bytes.small_pool.allocated": 1871148133487,
|
| 102 |
-
"requested_bytes.small_pool.current": 1761336,
|
| 103 |
-
"requested_bytes.small_pool.freed": 1871146372151,
|
| 104 |
-
"requested_bytes.small_pool.peak": 9073624,
|
| 105 |
-
"reserved_bytes.all.allocated": 648019968,
|
| 106 |
-
"reserved_bytes.all.current": 648019968,
|
| 107 |
-
"reserved_bytes.all.freed": 0,
|
| 108 |
-
"reserved_bytes.all.peak": 648019968,
|
| 109 |
-
"reserved_bytes.large_pool.allocated": 637534208,
|
| 110 |
-
"reserved_bytes.large_pool.current": 637534208,
|
| 111 |
-
"reserved_bytes.large_pool.freed": 0,
|
| 112 |
-
"reserved_bytes.large_pool.peak": 637534208,
|
| 113 |
-
"reserved_bytes.small_pool.allocated": 10485760,
|
| 114 |
-
"reserved_bytes.small_pool.current": 10485760,
|
| 115 |
-
"reserved_bytes.small_pool.freed": 0,
|
| 116 |
-
"reserved_bytes.small_pool.peak": 10485760,
|
| 117 |
-
"segment.all.allocated": 32,
|
| 118 |
-
"segment.all.current": 32,
|
| 119 |
-
"segment.all.freed": 0,
|
| 120 |
-
"segment.all.peak": 32,
|
| 121 |
-
"segment.large_pool.allocated": 27,
|
| 122 |
-
"segment.large_pool.current": 27,
|
| 123 |
-
"segment.large_pool.freed": 0,
|
| 124 |
-
"segment.large_pool.peak": 27,
|
| 125 |
-
"segment.small_pool.allocated": 5,
|
| 126 |
-
"segment.small_pool.current": 5,
|
| 127 |
-
"segment.small_pool.freed": 0,
|
| 128 |
-
"segment.small_pool.peak": 5
|
| 129 |
-
}
|
| 130 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea40bb55d84163198cdbeefcabee8d1ba4814a01e59c062b2034dff472564800
|
| 3 |
+
size 6046
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/gpu_stats_la.json
CHANGED
|
@@ -1,130 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"memory_reserved": 6358564864,
|
| 5 |
-
"max_memory_reserved": 6727663616,
|
| 6 |
-
"memory_stats": {
|
| 7 |
-
"active.all.allocated": 24482704,
|
| 8 |
-
"active.all.current": 10809,
|
| 9 |
-
"active.all.freed": 24471895,
|
| 10 |
-
"active.all.peak": 10985,
|
| 11 |
-
"active.large_pool.allocated": 11722181,
|
| 12 |
-
"active.large_pool.current": 222,
|
| 13 |
-
"active.large_pool.freed": 11721959,
|
| 14 |
-
"active.large_pool.peak": 360,
|
| 15 |
-
"active.small_pool.allocated": 12760523,
|
| 16 |
-
"active.small_pool.current": 10587,
|
| 17 |
-
"active.small_pool.freed": 12749936,
|
| 18 |
-
"active.small_pool.peak": 10750,
|
| 19 |
-
"active_bytes.all.allocated": 78209693356544,
|
| 20 |
-
"active_bytes.all.current": 808749568,
|
| 21 |
-
"active_bytes.all.freed": 78208884606976,
|
| 22 |
-
"active_bytes.all.peak": 5014978048,
|
| 23 |
-
"active_bytes.large_pool.allocated": 74862721955840,
|
| 24 |
-
"active_bytes.large_pool.current": 799473664,
|
| 25 |
-
"active_bytes.large_pool.freed": 74861922482176,
|
| 26 |
-
"active_bytes.large_pool.peak": 4997783552,
|
| 27 |
-
"active_bytes.small_pool.allocated": 3346971400704,
|
| 28 |
-
"active_bytes.small_pool.current": 9275904,
|
| 29 |
-
"active_bytes.small_pool.freed": 3346962124800,
|
| 30 |
-
"active_bytes.small_pool.peak": 124137472,
|
| 31 |
-
"allocated_bytes.all.allocated": 78209693356544,
|
| 32 |
-
"allocated_bytes.all.current": 808749568,
|
| 33 |
-
"allocated_bytes.all.freed": 78208884606976,
|
| 34 |
-
"allocated_bytes.all.peak": 5014978048,
|
| 35 |
-
"allocated_bytes.large_pool.allocated": 74862721955840,
|
| 36 |
-
"allocated_bytes.large_pool.current": 799473664,
|
| 37 |
-
"allocated_bytes.large_pool.freed": 74861922482176,
|
| 38 |
-
"allocated_bytes.large_pool.peak": 4997783552,
|
| 39 |
-
"allocated_bytes.small_pool.allocated": 3346971400704,
|
| 40 |
-
"allocated_bytes.small_pool.current": 9275904,
|
| 41 |
-
"allocated_bytes.small_pool.freed": 3346962124800,
|
| 42 |
-
"allocated_bytes.small_pool.peak": 124137472,
|
| 43 |
-
"allocation.all.allocated": 24482704,
|
| 44 |
-
"allocation.all.current": 10809,
|
| 45 |
-
"allocation.all.freed": 24471895,
|
| 46 |
-
"allocation.all.peak": 10985,
|
| 47 |
-
"allocation.large_pool.allocated": 11722181,
|
| 48 |
-
"allocation.large_pool.current": 222,
|
| 49 |
-
"allocation.large_pool.freed": 11721959,
|
| 50 |
-
"allocation.large_pool.peak": 360,
|
| 51 |
-
"allocation.small_pool.allocated": 12760523,
|
| 52 |
-
"allocation.small_pool.current": 10587,
|
| 53 |
-
"allocation.small_pool.freed": 12749936,
|
| 54 |
-
"allocation.small_pool.peak": 10750,
|
| 55 |
-
"inactive_split.all.allocated": 12808394,
|
| 56 |
-
"inactive_split.all.current": 115,
|
| 57 |
-
"inactive_split.all.freed": 12808279,
|
| 58 |
-
"inactive_split.all.peak": 246,
|
| 59 |
-
"inactive_split.large_pool.allocated": 8088991,
|
| 60 |
-
"inactive_split.large_pool.current": 38,
|
| 61 |
-
"inactive_split.large_pool.freed": 8088953,
|
| 62 |
-
"inactive_split.large_pool.peak": 135,
|
| 63 |
-
"inactive_split.small_pool.allocated": 4719403,
|
| 64 |
-
"inactive_split.small_pool.current": 77,
|
| 65 |
-
"inactive_split.small_pool.freed": 4719326,
|
| 66 |
-
"inactive_split.small_pool.peak": 156,
|
| 67 |
-
"inactive_split_bytes.all.allocated": 71846063992832,
|
| 68 |
-
"inactive_split_bytes.all.current": 116094464,
|
| 69 |
-
"inactive_split_bytes.all.freed": 71845947898368,
|
| 70 |
-
"inactive_split_bytes.all.peak": 1893863936,
|
| 71 |
-
"inactive_split_bytes.large_pool.allocated": 68396959819776,
|
| 72 |
-
"inactive_split_bytes.large_pool.current": 91815936,
|
| 73 |
-
"inactive_split_bytes.large_pool.freed": 68396868003840,
|
| 74 |
-
"inactive_split_bytes.large_pool.peak": 1887404032,
|
| 75 |
-
"inactive_split_bytes.small_pool.allocated": 3449104173056,
|
| 76 |
-
"inactive_split_bytes.small_pool.current": 24278528,
|
| 77 |
-
"inactive_split_bytes.small_pool.freed": 3449079894528,
|
| 78 |
-
"inactive_split_bytes.small_pool.peak": 84646400,
|
| 79 |
-
"max_split_size": -1,
|
| 80 |
-
"num_alloc_retries": 0,
|
| 81 |
-
"num_device_alloc": 436,
|
| 82 |
-
"num_device_free": 130,
|
| 83 |
-
"num_ooms": 0,
|
| 84 |
-
"num_sync_all_streams": 1,
|
| 85 |
-
"oversize_allocations.allocated": 0,
|
| 86 |
-
"oversize_allocations.current": 0,
|
| 87 |
-
"oversize_allocations.freed": 0,
|
| 88 |
-
"oversize_allocations.peak": 0,
|
| 89 |
-
"oversize_segments.allocated": 0,
|
| 90 |
-
"oversize_segments.current": 0,
|
| 91 |
-
"oversize_segments.freed": 0,
|
| 92 |
-
"oversize_segments.peak": 0,
|
| 93 |
-
"requested_bytes.all.allocated": 77380438428195,
|
| 94 |
-
"requested_bytes.all.current": 800396976,
|
| 95 |
-
"requested_bytes.all.freed": 77379638031219,
|
| 96 |
-
"requested_bytes.all.peak": 4992933420,
|
| 97 |
-
"requested_bytes.large_pool.allocated": 74034645754624,
|
| 98 |
-
"requested_bytes.large_pool.current": 795392000,
|
| 99 |
-
"requested_bytes.large_pool.freed": 74033850362624,
|
| 100 |
-
"requested_bytes.large_pool.peak": 4975769600,
|
| 101 |
-
"requested_bytes.small_pool.allocated": 3345792673571,
|
| 102 |
-
"requested_bytes.small_pool.current": 5004976,
|
| 103 |
-
"requested_bytes.small_pool.freed": 3345787668595,
|
| 104 |
-
"requested_bytes.small_pool.peak": 119884008,
|
| 105 |
-
"reserved_bytes.all.allocated": 12182355968,
|
| 106 |
-
"reserved_bytes.all.current": 6358564864,
|
| 107 |
-
"reserved_bytes.all.freed": 5823791104,
|
| 108 |
-
"reserved_bytes.all.peak": 6727663616,
|
| 109 |
-
"reserved_bytes.large_pool.allocated": 12002000896,
|
| 110 |
-
"reserved_bytes.large_pool.current": 6230638592,
|
| 111 |
-
"reserved_bytes.large_pool.freed": 5771362304,
|
| 112 |
-
"reserved_bytes.large_pool.peak": 6662651904,
|
| 113 |
-
"reserved_bytes.small_pool.allocated": 180355072,
|
| 114 |
-
"reserved_bytes.small_pool.current": 127926272,
|
| 115 |
-
"reserved_bytes.small_pool.freed": 52428800,
|
| 116 |
-
"reserved_bytes.small_pool.peak": 127926272,
|
| 117 |
-
"segment.all.allocated": 436,
|
| 118 |
-
"segment.all.current": 306,
|
| 119 |
-
"segment.all.freed": 130,
|
| 120 |
-
"segment.all.peak": 306,
|
| 121 |
-
"segment.large_pool.allocated": 350,
|
| 122 |
-
"segment.large_pool.current": 245,
|
| 123 |
-
"segment.large_pool.freed": 105,
|
| 124 |
-
"segment.large_pool.peak": 245,
|
| 125 |
-
"segment.small_pool.allocated": 86,
|
| 126 |
-
"segment.small_pool.current": 61,
|
| 127 |
-
"segment.small_pool.freed": 25,
|
| 128 |
-
"segment.small_pool.peak": 61
|
| 129 |
-
}
|
| 130 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a08e54b4ea5ec71e13d699e69ca0fd0ff6279cc55eadba8a5e1b9e9bb0c3ee9f
|
| 3 |
+
size 6177
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/special_tokens_map.json
CHANGED
|
@@ -1,7 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"pad_token": "[PAD]",
|
| 5 |
-
"sep_token": "[SEP]",
|
| 6 |
-
"unk_token": "[UNK]"
|
| 7 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6d346be366a7d1d48332dbc9fdf3bf8960b5d879522b7799ddba59e76237ee3
|
| 3 |
+
size 125
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_0/tokenizer_config.json
CHANGED
|
@@ -1,55 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"content": "[PAD]",
|
| 5 |
-
"lstrip": false,
|
| 6 |
-
"normalized": false,
|
| 7 |
-
"rstrip": false,
|
| 8 |
-
"single_word": false,
|
| 9 |
-
"special": true
|
| 10 |
-
},
|
| 11 |
-
"100": {
|
| 12 |
-
"content": "[UNK]",
|
| 13 |
-
"lstrip": false,
|
| 14 |
-
"normalized": false,
|
| 15 |
-
"rstrip": false,
|
| 16 |
-
"single_word": false,
|
| 17 |
-
"special": true
|
| 18 |
-
},
|
| 19 |
-
"101": {
|
| 20 |
-
"content": "[CLS]",
|
| 21 |
-
"lstrip": false,
|
| 22 |
-
"normalized": false,
|
| 23 |
-
"rstrip": false,
|
| 24 |
-
"single_word": false,
|
| 25 |
-
"special": true
|
| 26 |
-
},
|
| 27 |
-
"102": {
|
| 28 |
-
"content": "[SEP]",
|
| 29 |
-
"lstrip": false,
|
| 30 |
-
"normalized": false,
|
| 31 |
-
"rstrip": false,
|
| 32 |
-
"single_word": false,
|
| 33 |
-
"special": true
|
| 34 |
-
},
|
| 35 |
-
"103": {
|
| 36 |
-
"content": "[MASK]",
|
| 37 |
-
"lstrip": false,
|
| 38 |
-
"normalized": false,
|
| 39 |
-
"rstrip": false,
|
| 40 |
-
"single_word": false,
|
| 41 |
-
"special": true
|
| 42 |
-
}
|
| 43 |
-
},
|
| 44 |
-
"clean_up_tokenization_spaces": true,
|
| 45 |
-
"cls_token": "[CLS]",
|
| 46 |
-
"do_lower_case": true,
|
| 47 |
-
"mask_token": "[MASK]",
|
| 48 |
-
"model_max_length": 512,
|
| 49 |
-
"pad_token": "[PAD]",
|
| 50 |
-
"sep_token": "[SEP]",
|
| 51 |
-
"strip_accents": null,
|
| 52 |
-
"tokenize_chinese_chars": true,
|
| 53 |
-
"tokenizer_class": "BertTokenizer",
|
| 54 |
-
"unk_token": "[UNK]"
|
| 55 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f62a57a75856b93282501c92a86f62b169997c81e93cf6f75b7cc15d6285968e
|
| 3 |
+
size 1190
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/adapter_config.json
CHANGED
|
@@ -1,32 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"base_model_name_or_path": "bert-base-uncased",
|
| 5 |
-
"bias": "none",
|
| 6 |
-
"fan_in_fan_out": false,
|
| 7 |
-
"inference_mode": true,
|
| 8 |
-
"init_lora_weights": true,
|
| 9 |
-
"layer_replication": null,
|
| 10 |
-
"layers_pattern": null,
|
| 11 |
-
"layers_to_transform": null,
|
| 12 |
-
"loftq_config": {},
|
| 13 |
-
"lora_alpha": 16,
|
| 14 |
-
"lora_dropout": 0.1,
|
| 15 |
-
"megatron_config": null,
|
| 16 |
-
"megatron_core": "megatron.core",
|
| 17 |
-
"modules_to_save": [
|
| 18 |
-
"classifier",
|
| 19 |
-
"score"
|
| 20 |
-
],
|
| 21 |
-
"peft_type": "LORA",
|
| 22 |
-
"r": 8,
|
| 23 |
-
"rank_pattern": {},
|
| 24 |
-
"revision": null,
|
| 25 |
-
"target_modules": [
|
| 26 |
-
"value",
|
| 27 |
-
"query"
|
| 28 |
-
],
|
| 29 |
-
"task_type": "SEQ_CLS",
|
| 30 |
-
"use_dora": false,
|
| 31 |
-
"use_rslora": false
|
| 32 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81a29a631d083a248a7fe8b5acc0d972f1e62158c7e9d95bdbbeffc2630802a8
|
| 3 |
+
size 668
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/all_results.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8a2b532b49799110512f9cdc930cff4015cb4f79b91c0877f12cc036f3c2ce5
|
| 3 |
+
size 68
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/all_results_la_kron_all_homo_mc_corr_1000.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a89c9e0a18de01aad9dd42f84630c267bec8a14c7bfa29b3c84586011cb0599
|
| 3 |
+
size 67
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/all_results_val.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d90a2c37f96886daac450e45e9fcb3f1bd8aee4676ca3e2107202575ba48506
|
| 3 |
+
size 66
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/eval_res.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/eval_res_la_kron_all_homo_mc_corr_1000.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/gpu_stats.json
CHANGED
|
@@ -1,130 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"memory_reserved": 6840909824,
|
| 5 |
-
"max_memory_reserved": 6840909824,
|
| 6 |
-
"memory_stats": {
|
| 7 |
-
"active.all.allocated": 27775346,
|
| 8 |
-
"active.all.current": 367,
|
| 9 |
-
"active.all.freed": 27774979,
|
| 10 |
-
"active.all.peak": 627,
|
| 11 |
-
"active.large_pool.allocated": 13181532,
|
| 12 |
-
"active.large_pool.current": 77,
|
| 13 |
-
"active.large_pool.freed": 13181455,
|
| 14 |
-
"active.large_pool.peak": 250,
|
| 15 |
-
"active.small_pool.allocated": 14593814,
|
| 16 |
-
"active.small_pool.current": 290,
|
| 17 |
-
"active.small_pool.freed": 14593524,
|
| 18 |
-
"active.small_pool.peak": 538,
|
| 19 |
-
"active_bytes.all.allocated": 75488457111552,
|
| 20 |
-
"active_bytes.all.current": 459737600,
|
| 21 |
-
"active_bytes.all.freed": 75487997373952,
|
| 22 |
-
"active_bytes.all.peak": 5480528896,
|
| 23 |
-
"active_bytes.large_pool.allocated": 68824605308416,
|
| 24 |
-
"active_bytes.large_pool.current": 455606272,
|
| 25 |
-
"active_bytes.large_pool.freed": 68824149702144,
|
| 26 |
-
"active_bytes.large_pool.peak": 5461108736,
|
| 27 |
-
"active_bytes.small_pool.allocated": 6663851803136,
|
| 28 |
-
"active_bytes.small_pool.current": 4131328,
|
| 29 |
-
"active_bytes.small_pool.freed": 6663847671808,
|
| 30 |
-
"active_bytes.small_pool.peak": 98207744,
|
| 31 |
-
"allocated_bytes.all.allocated": 75488457111552,
|
| 32 |
-
"allocated_bytes.all.current": 459737600,
|
| 33 |
-
"allocated_bytes.all.freed": 75487997373952,
|
| 34 |
-
"allocated_bytes.all.peak": 5480528896,
|
| 35 |
-
"allocated_bytes.large_pool.allocated": 68824605308416,
|
| 36 |
-
"allocated_bytes.large_pool.current": 455606272,
|
| 37 |
-
"allocated_bytes.large_pool.freed": 68824149702144,
|
| 38 |
-
"allocated_bytes.large_pool.peak": 5461108736,
|
| 39 |
-
"allocated_bytes.small_pool.allocated": 6663851803136,
|
| 40 |
-
"allocated_bytes.small_pool.current": 4131328,
|
| 41 |
-
"allocated_bytes.small_pool.freed": 6663847671808,
|
| 42 |
-
"allocated_bytes.small_pool.peak": 98207744,
|
| 43 |
-
"allocation.all.allocated": 27775346,
|
| 44 |
-
"allocation.all.current": 367,
|
| 45 |
-
"allocation.all.freed": 27774979,
|
| 46 |
-
"allocation.all.peak": 627,
|
| 47 |
-
"allocation.large_pool.allocated": 13181532,
|
| 48 |
-
"allocation.large_pool.current": 77,
|
| 49 |
-
"allocation.large_pool.freed": 13181455,
|
| 50 |
-
"allocation.large_pool.peak": 250,
|
| 51 |
-
"allocation.small_pool.allocated": 14593814,
|
| 52 |
-
"allocation.small_pool.current": 290,
|
| 53 |
-
"allocation.small_pool.freed": 14593524,
|
| 54 |
-
"allocation.small_pool.peak": 538,
|
| 55 |
-
"inactive_split.all.allocated": 14966847,
|
| 56 |
-
"inactive_split.all.current": 32,
|
| 57 |
-
"inactive_split.all.freed": 14966815,
|
| 58 |
-
"inactive_split.all.peak": 143,
|
| 59 |
-
"inactive_split.large_pool.allocated": 10529227,
|
| 60 |
-
"inactive_split.large_pool.current": 19,
|
| 61 |
-
"inactive_split.large_pool.freed": 10529208,
|
| 62 |
-
"inactive_split.large_pool.peak": 123,
|
| 63 |
-
"inactive_split.small_pool.allocated": 4437620,
|
| 64 |
-
"inactive_split.small_pool.current": 13,
|
| 65 |
-
"inactive_split.small_pool.freed": 4437607,
|
| 66 |
-
"inactive_split.small_pool.peak": 70,
|
| 67 |
-
"inactive_split_bytes.all.allocated": 68752278194176,
|
| 68 |
-
"inactive_split_bytes.all.current": 64550400,
|
| 69 |
-
"inactive_split_bytes.all.freed": 68752213643776,
|
| 70 |
-
"inactive_split_bytes.all.peak": 1129547776,
|
| 71 |
-
"inactive_split_bytes.large_pool.allocated": 61954103755264,
|
| 72 |
-
"inactive_split_bytes.large_pool.current": 58195968,
|
| 73 |
-
"inactive_split_bytes.large_pool.freed": 61954045559296,
|
| 74 |
-
"inactive_split_bytes.large_pool.peak": 1127497728,
|
| 75 |
-
"inactive_split_bytes.small_pool.allocated": 6798174438912,
|
| 76 |
-
"inactive_split_bytes.small_pool.current": 6354432,
|
| 77 |
-
"inactive_split_bytes.small_pool.freed": 6798168084480,
|
| 78 |
-
"inactive_split_bytes.small_pool.peak": 43309568,
|
| 79 |
-
"max_split_size": -1,
|
| 80 |
-
"num_alloc_retries": 0,
|
| 81 |
-
"num_device_alloc": 207,
|
| 82 |
-
"num_device_free": 0,
|
| 83 |
-
"num_ooms": 0,
|
| 84 |
-
"num_sync_all_streams": 0,
|
| 85 |
-
"oversize_allocations.allocated": 0,
|
| 86 |
-
"oversize_allocations.current": 0,
|
| 87 |
-
"oversize_allocations.freed": 0,
|
| 88 |
-
"oversize_allocations.peak": 0,
|
| 89 |
-
"oversize_segments.allocated": 0,
|
| 90 |
-
"oversize_segments.current": 0,
|
| 91 |
-
"oversize_segments.freed": 0,
|
| 92 |
-
"oversize_segments.peak": 0,
|
| 93 |
-
"requested_bytes.all.allocated": 70973866325084,
|
| 94 |
-
"requested_bytes.all.current": 458599752,
|
| 95 |
-
"requested_bytes.all.freed": 70973407725332,
|
| 96 |
-
"requested_bytes.all.peak": 5476793900,
|
| 97 |
-
"requested_bytes.large_pool.allocated": 64311530915008,
|
| 98 |
-
"requested_bytes.large_pool.current": 454473728,
|
| 99 |
-
"requested_bytes.large_pool.freed": 64311076441280,
|
| 100 |
-
"requested_bytes.large_pool.peak": 5457379328,
|
| 101 |
-
"requested_bytes.small_pool.allocated": 6662335410076,
|
| 102 |
-
"requested_bytes.small_pool.current": 4126024,
|
| 103 |
-
"requested_bytes.small_pool.freed": 6662331284052,
|
| 104 |
-
"requested_bytes.small_pool.peak": 98173436,
|
| 105 |
-
"reserved_bytes.all.allocated": 6840909824,
|
| 106 |
-
"reserved_bytes.all.current": 6840909824,
|
| 107 |
-
"reserved_bytes.all.freed": 0,
|
| 108 |
-
"reserved_bytes.all.peak": 6840909824,
|
| 109 |
-
"reserved_bytes.large_pool.allocated": 6729760768,
|
| 110 |
-
"reserved_bytes.large_pool.current": 6729760768,
|
| 111 |
-
"reserved_bytes.large_pool.freed": 0,
|
| 112 |
-
"reserved_bytes.large_pool.peak": 6729760768,
|
| 113 |
-
"reserved_bytes.small_pool.allocated": 111149056,
|
| 114 |
-
"reserved_bytes.small_pool.current": 111149056,
|
| 115 |
-
"reserved_bytes.small_pool.freed": 0,
|
| 116 |
-
"reserved_bytes.small_pool.peak": 111149056,
|
| 117 |
-
"segment.all.allocated": 207,
|
| 118 |
-
"segment.all.current": 207,
|
| 119 |
-
"segment.all.freed": 0,
|
| 120 |
-
"segment.all.peak": 207,
|
| 121 |
-
"segment.large_pool.allocated": 154,
|
| 122 |
-
"segment.large_pool.current": 154,
|
| 123 |
-
"segment.large_pool.freed": 0,
|
| 124 |
-
"segment.large_pool.peak": 154,
|
| 125 |
-
"segment.small_pool.allocated": 53,
|
| 126 |
-
"segment.small_pool.current": 53,
|
| 127 |
-
"segment.small_pool.freed": 0,
|
| 128 |
-
"segment.small_pool.peak": 53
|
| 129 |
-
}
|
| 130 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3cacdee0e75f6018b4cb7944afd30f8210df2e43bd561b364bcd740367360c4
|
| 3 |
+
size 6120
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/gpu_stats_la.json
CHANGED
|
@@ -1,130 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"memory_reserved": 6350176256,
|
| 5 |
-
"max_memory_reserved": 6752829440,
|
| 6 |
-
"memory_stats": {
|
| 7 |
-
"active.all.allocated": 73448108,
|
| 8 |
-
"active.all.current": 10809,
|
| 9 |
-
"active.all.freed": 73437299,
|
| 10 |
-
"active.all.peak": 10985,
|
| 11 |
-
"active.large_pool.allocated": 35166539,
|
| 12 |
-
"active.large_pool.current": 222,
|
| 13 |
-
"active.large_pool.freed": 35166317,
|
| 14 |
-
"active.large_pool.peak": 360,
|
| 15 |
-
"active.small_pool.allocated": 38281569,
|
| 16 |
-
"active.small_pool.current": 10587,
|
| 17 |
-
"active.small_pool.freed": 38270982,
|
| 18 |
-
"active.small_pool.peak": 10750,
|
| 19 |
-
"active_bytes.all.allocated": 234626955736064,
|
| 20 |
-
"active_bytes.all.current": 808356352,
|
| 21 |
-
"active_bytes.all.freed": 234626147379712,
|
| 22 |
-
"active_bytes.all.peak": 5015770624,
|
| 23 |
-
"active_bytes.large_pool.allocated": 224586041533952,
|
| 24 |
-
"active_bytes.large_pool.current": 799080448,
|
| 25 |
-
"active_bytes.large_pool.freed": 224585242453504,
|
| 26 |
-
"active_bytes.large_pool.peak": 4998576128,
|
| 27 |
-
"active_bytes.small_pool.allocated": 10040914202112,
|
| 28 |
-
"active_bytes.small_pool.current": 9275904,
|
| 29 |
-
"active_bytes.small_pool.freed": 10040904926208,
|
| 30 |
-
"active_bytes.small_pool.peak": 124137472,
|
| 31 |
-
"allocated_bytes.all.allocated": 234626955736064,
|
| 32 |
-
"allocated_bytes.all.current": 808356352,
|
| 33 |
-
"allocated_bytes.all.freed": 234626147379712,
|
| 34 |
-
"allocated_bytes.all.peak": 5015770624,
|
| 35 |
-
"allocated_bytes.large_pool.allocated": 224586041533952,
|
| 36 |
-
"allocated_bytes.large_pool.current": 799080448,
|
| 37 |
-
"allocated_bytes.large_pool.freed": 224585242453504,
|
| 38 |
-
"allocated_bytes.large_pool.peak": 4998576128,
|
| 39 |
-
"allocated_bytes.small_pool.allocated": 10040914202112,
|
| 40 |
-
"allocated_bytes.small_pool.current": 9275904,
|
| 41 |
-
"allocated_bytes.small_pool.freed": 10040904926208,
|
| 42 |
-
"allocated_bytes.small_pool.peak": 124137472,
|
| 43 |
-
"allocation.all.allocated": 73448108,
|
| 44 |
-
"allocation.all.current": 10809,
|
| 45 |
-
"allocation.all.freed": 73437299,
|
| 46 |
-
"allocation.all.peak": 10985,
|
| 47 |
-
"allocation.large_pool.allocated": 35166539,
|
| 48 |
-
"allocation.large_pool.current": 222,
|
| 49 |
-
"allocation.large_pool.freed": 35166317,
|
| 50 |
-
"allocation.large_pool.peak": 360,
|
| 51 |
-
"allocation.small_pool.allocated": 38281569,
|
| 52 |
-
"allocation.small_pool.current": 10587,
|
| 53 |
-
"allocation.small_pool.freed": 38270982,
|
| 54 |
-
"allocation.small_pool.peak": 10750,
|
| 55 |
-
"inactive_split.all.allocated": 38491526,
|
| 56 |
-
"inactive_split.all.current": 119,
|
| 57 |
-
"inactive_split.all.freed": 38491407,
|
| 58 |
-
"inactive_split.all.peak": 1966,
|
| 59 |
-
"inactive_split.large_pool.allocated": 24502987,
|
| 60 |
-
"inactive_split.large_pool.current": 38,
|
| 61 |
-
"inactive_split.large_pool.freed": 24502949,
|
| 62 |
-
"inactive_split.large_pool.peak": 141,
|
| 63 |
-
"inactive_split.small_pool.allocated": 13988539,
|
| 64 |
-
"inactive_split.small_pool.current": 81,
|
| 65 |
-
"inactive_split.small_pool.freed": 13988458,
|
| 66 |
-
"inactive_split.small_pool.peak": 1920,
|
| 67 |
-
"inactive_split_bytes.all.allocated": 216296435532288,
|
| 68 |
-
"inactive_split_bytes.all.current": 120681984,
|
| 69 |
-
"inactive_split_bytes.all.freed": 216296314850304,
|
| 70 |
-
"inactive_split_bytes.all.peak": 1893863936,
|
| 71 |
-
"inactive_split_bytes.large_pool.allocated": 205937432882688,
|
| 72 |
-
"inactive_split_bytes.large_pool.current": 92209152,
|
| 73 |
-
"inactive_split_bytes.large_pool.freed": 205937340673536,
|
| 74 |
-
"inactive_split_bytes.large_pool.peak": 1887404032,
|
| 75 |
-
"inactive_split_bytes.small_pool.allocated": 10359002649600,
|
| 76 |
-
"inactive_split_bytes.small_pool.current": 28472832,
|
| 77 |
-
"inactive_split_bytes.small_pool.freed": 10358974176768,
|
| 78 |
-
"inactive_split_bytes.small_pool.peak": 84646400,
|
| 79 |
-
"max_split_size": -1,
|
| 80 |
-
"num_alloc_retries": 0,
|
| 81 |
-
"num_device_alloc": 1230,
|
| 82 |
-
"num_device_free": 924,
|
| 83 |
-
"num_ooms": 0,
|
| 84 |
-
"num_sync_all_streams": 5,
|
| 85 |
-
"oversize_allocations.allocated": 0,
|
| 86 |
-
"oversize_allocations.current": 0,
|
| 87 |
-
"oversize_allocations.freed": 0,
|
| 88 |
-
"oversize_allocations.peak": 0,
|
| 89 |
-
"oversize_segments.allocated": 0,
|
| 90 |
-
"oversize_segments.current": 0,
|
| 91 |
-
"oversize_segments.freed": 0,
|
| 92 |
-
"oversize_segments.peak": 0,
|
| 93 |
-
"requested_bytes.all.allocated": 232141281205865,
|
| 94 |
-
"requested_bytes.all.current": 800396976,
|
| 95 |
-
"requested_bytes.all.freed": 232140480808889,
|
| 96 |
-
"requested_bytes.all.peak": 4992933420,
|
| 97 |
-
"requested_bytes.large_pool.allocated": 222103903185152,
|
| 98 |
-
"requested_bytes.large_pool.current": 795392000,
|
| 99 |
-
"requested_bytes.large_pool.freed": 222103107793152,
|
| 100 |
-
"requested_bytes.large_pool.peak": 4975769600,
|
| 101 |
-
"requested_bytes.small_pool.allocated": 10037378020713,
|
| 102 |
-
"requested_bytes.small_pool.current": 5004976,
|
| 103 |
-
"requested_bytes.small_pool.freed": 10037373015737,
|
| 104 |
-
"requested_bytes.small_pool.peak": 119884008,
|
| 105 |
-
"reserved_bytes.all.allocated": 34997272576,
|
| 106 |
-
"reserved_bytes.all.current": 6350176256,
|
| 107 |
-
"reserved_bytes.all.freed": 28647096320,
|
| 108 |
-
"reserved_bytes.all.peak": 6752829440,
|
| 109 |
-
"reserved_bytes.large_pool.allocated": 34483470336,
|
| 110 |
-
"reserved_bytes.large_pool.current": 6222249984,
|
| 111 |
-
"reserved_bytes.large_pool.freed": 28261220352,
|
| 112 |
-
"reserved_bytes.large_pool.peak": 6687817728,
|
| 113 |
-
"reserved_bytes.small_pool.allocated": 513802240,
|
| 114 |
-
"reserved_bytes.small_pool.current": 127926272,
|
| 115 |
-
"reserved_bytes.small_pool.freed": 385875968,
|
| 116 |
-
"reserved_bytes.small_pool.peak": 127926272,
|
| 117 |
-
"segment.all.allocated": 1230,
|
| 118 |
-
"segment.all.current": 306,
|
| 119 |
-
"segment.all.freed": 924,
|
| 120 |
-
"segment.all.peak": 306,
|
| 121 |
-
"segment.large_pool.allocated": 985,
|
| 122 |
-
"segment.large_pool.current": 245,
|
| 123 |
-
"segment.large_pool.freed": 740,
|
| 124 |
-
"segment.large_pool.peak": 245,
|
| 125 |
-
"segment.small_pool.allocated": 245,
|
| 126 |
-
"segment.small_pool.current": 61,
|
| 127 |
-
"segment.small_pool.freed": 184,
|
| 128 |
-
"segment.small_pool.peak": 61
|
| 129 |
-
}
|
| 130 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7ea034a00859dede9a9b30a227018a2ab98782be37df0d1525dd5688ccd88a7
|
| 3 |
+
size 6214
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/special_tokens_map.json
CHANGED
|
@@ -1,7 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"pad_token": "[PAD]",
|
| 5 |
-
"sep_token": "[SEP]",
|
| 6 |
-
"unk_token": "[UNK]"
|
| 7 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6d346be366a7d1d48332dbc9fdf3bf8960b5d879522b7799ddba59e76237ee3
|
| 3 |
+
size 125
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_10915/tokenizer_config.json
CHANGED
|
@@ -1,55 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"content": "[PAD]",
|
| 5 |
-
"lstrip": false,
|
| 6 |
-
"normalized": false,
|
| 7 |
-
"rstrip": false,
|
| 8 |
-
"single_word": false,
|
| 9 |
-
"special": true
|
| 10 |
-
},
|
| 11 |
-
"100": {
|
| 12 |
-
"content": "[UNK]",
|
| 13 |
-
"lstrip": false,
|
| 14 |
-
"normalized": false,
|
| 15 |
-
"rstrip": false,
|
| 16 |
-
"single_word": false,
|
| 17 |
-
"special": true
|
| 18 |
-
},
|
| 19 |
-
"101": {
|
| 20 |
-
"content": "[CLS]",
|
| 21 |
-
"lstrip": false,
|
| 22 |
-
"normalized": false,
|
| 23 |
-
"rstrip": false,
|
| 24 |
-
"single_word": false,
|
| 25 |
-
"special": true
|
| 26 |
-
},
|
| 27 |
-
"102": {
|
| 28 |
-
"content": "[SEP]",
|
| 29 |
-
"lstrip": false,
|
| 30 |
-
"normalized": false,
|
| 31 |
-
"rstrip": false,
|
| 32 |
-
"single_word": false,
|
| 33 |
-
"special": true
|
| 34 |
-
},
|
| 35 |
-
"103": {
|
| 36 |
-
"content": "[MASK]",
|
| 37 |
-
"lstrip": false,
|
| 38 |
-
"normalized": false,
|
| 39 |
-
"rstrip": false,
|
| 40 |
-
"single_word": false,
|
| 41 |
-
"special": true
|
| 42 |
-
}
|
| 43 |
-
},
|
| 44 |
-
"clean_up_tokenization_spaces": true,
|
| 45 |
-
"cls_token": "[CLS]",
|
| 46 |
-
"do_lower_case": true,
|
| 47 |
-
"mask_token": "[MASK]",
|
| 48 |
-
"model_max_length": 512,
|
| 49 |
-
"pad_token": "[PAD]",
|
| 50 |
-
"sep_token": "[SEP]",
|
| 51 |
-
"strip_accents": null,
|
| 52 |
-
"tokenize_chinese_chars": true,
|
| 53 |
-
"tokenizer_class": "BertTokenizer",
|
| 54 |
-
"unk_token": "[UNK]"
|
| 55 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f62a57a75856b93282501c92a86f62b169997c81e93cf6f75b7cc15d6285968e
|
| 3 |
+
size 1190
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/adapter_config.json
CHANGED
|
@@ -1,32 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"base_model_name_or_path": "bert-base-uncased",
|
| 5 |
-
"bias": "none",
|
| 6 |
-
"fan_in_fan_out": false,
|
| 7 |
-
"inference_mode": true,
|
| 8 |
-
"init_lora_weights": true,
|
| 9 |
-
"layer_replication": null,
|
| 10 |
-
"layers_pattern": null,
|
| 11 |
-
"layers_to_transform": null,
|
| 12 |
-
"loftq_config": {},
|
| 13 |
-
"lora_alpha": 16,
|
| 14 |
-
"lora_dropout": 0.1,
|
| 15 |
-
"megatron_config": null,
|
| 16 |
-
"megatron_core": "megatron.core",
|
| 17 |
-
"modules_to_save": [
|
| 18 |
-
"classifier",
|
| 19 |
-
"score"
|
| 20 |
-
],
|
| 21 |
-
"peft_type": "LORA",
|
| 22 |
-
"r": 8,
|
| 23 |
-
"rank_pattern": {},
|
| 24 |
-
"revision": null,
|
| 25 |
-
"target_modules": [
|
| 26 |
-
"value",
|
| 27 |
-
"query"
|
| 28 |
-
],
|
| 29 |
-
"task_type": "SEQ_CLS",
|
| 30 |
-
"use_dora": false,
|
| 31 |
-
"use_rslora": false
|
| 32 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81a29a631d083a248a7fe8b5acc0d972f1e62158c7e9d95bdbbeffc2630802a8
|
| 3 |
+
size 668
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/all_results.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b84741c4d971f9bf30ba410095ba549f669d191b2237e039c7cc4fbcd73b3e03
|
| 3 |
+
size 68
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/all_results_la_kron_all_homo_mc_corr_1000.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e28e43b174cffd344dc9e04521992272a0609ab9632bab865338c0f931f1fabb
|
| 3 |
+
size 68
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/all_results_val.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d51821dbbe2e76ed118fdce5f3fc47b472feb5b556e10801ef544f10bd48acd
|
| 3 |
+
size 68
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/eval_res.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/eval_res_la_kron_all_homo_mc_corr_1000.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/gpu_stats.json
CHANGED
|
@@ -1,130 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"memory_reserved": 6840909824,
|
| 5 |
-
"max_memory_reserved": 6840909824,
|
| 6 |
-
"memory_stats": {
|
| 7 |
-
"active.all.allocated": 38723233,
|
| 8 |
-
"active.all.current": 367,
|
| 9 |
-
"active.all.freed": 38722866,
|
| 10 |
-
"active.all.peak": 627,
|
| 11 |
-
"active.large_pool.allocated": 18627179,
|
| 12 |
-
"active.large_pool.current": 77,
|
| 13 |
-
"active.large_pool.freed": 18627102,
|
| 14 |
-
"active.large_pool.peak": 250,
|
| 15 |
-
"active.small_pool.allocated": 20096054,
|
| 16 |
-
"active.small_pool.current": 290,
|
| 17 |
-
"active.small_pool.freed": 20095764,
|
| 18 |
-
"active.small_pool.peak": 538,
|
| 19 |
-
"active_bytes.all.allocated": 109319365175808,
|
| 20 |
-
"active_bytes.all.current": 459742208,
|
| 21 |
-
"active_bytes.all.freed": 109318905433600,
|
| 22 |
-
"active_bytes.all.peak": 5480528896,
|
| 23 |
-
"active_bytes.large_pool.allocated": 100259252704768,
|
| 24 |
-
"active_bytes.large_pool.current": 455606272,
|
| 25 |
-
"active_bytes.large_pool.freed": 100258797098496,
|
| 26 |
-
"active_bytes.large_pool.peak": 5461108736,
|
| 27 |
-
"active_bytes.small_pool.allocated": 9060112471040,
|
| 28 |
-
"active_bytes.small_pool.current": 4135936,
|
| 29 |
-
"active_bytes.small_pool.freed": 9060108335104,
|
| 30 |
-
"active_bytes.small_pool.peak": 98207744,
|
| 31 |
-
"allocated_bytes.all.allocated": 109319365175808,
|
| 32 |
-
"allocated_bytes.all.current": 459742208,
|
| 33 |
-
"allocated_bytes.all.freed": 109318905433600,
|
| 34 |
-
"allocated_bytes.all.peak": 5480528896,
|
| 35 |
-
"allocated_bytes.large_pool.allocated": 100259252704768,
|
| 36 |
-
"allocated_bytes.large_pool.current": 455606272,
|
| 37 |
-
"allocated_bytes.large_pool.freed": 100258797098496,
|
| 38 |
-
"allocated_bytes.large_pool.peak": 5461108736,
|
| 39 |
-
"allocated_bytes.small_pool.allocated": 9060112471040,
|
| 40 |
-
"allocated_bytes.small_pool.current": 4135936,
|
| 41 |
-
"allocated_bytes.small_pool.freed": 9060108335104,
|
| 42 |
-
"allocated_bytes.small_pool.peak": 98207744,
|
| 43 |
-
"allocation.all.allocated": 38723233,
|
| 44 |
-
"allocation.all.current": 367,
|
| 45 |
-
"allocation.all.freed": 38722866,
|
| 46 |
-
"allocation.all.peak": 627,
|
| 47 |
-
"allocation.large_pool.allocated": 18627179,
|
| 48 |
-
"allocation.large_pool.current": 77,
|
| 49 |
-
"allocation.large_pool.freed": 18627102,
|
| 50 |
-
"allocation.large_pool.peak": 250,
|
| 51 |
-
"allocation.small_pool.allocated": 20096054,
|
| 52 |
-
"allocation.small_pool.current": 290,
|
| 53 |
-
"allocation.small_pool.freed": 20095764,
|
| 54 |
-
"allocation.small_pool.peak": 538,
|
| 55 |
-
"inactive_split.all.allocated": 20542981,
|
| 56 |
-
"inactive_split.all.current": 32,
|
| 57 |
-
"inactive_split.all.freed": 20542949,
|
| 58 |
-
"inactive_split.all.peak": 143,
|
| 59 |
-
"inactive_split.large_pool.allocated": 14749135,
|
| 60 |
-
"inactive_split.large_pool.current": 19,
|
| 61 |
-
"inactive_split.large_pool.freed": 14749116,
|
| 62 |
-
"inactive_split.large_pool.peak": 126,
|
| 63 |
-
"inactive_split.small_pool.allocated": 5793846,
|
| 64 |
-
"inactive_split.small_pool.current": 13,
|
| 65 |
-
"inactive_split.small_pool.freed": 5793833,
|
| 66 |
-
"inactive_split.small_pool.peak": 70,
|
| 67 |
-
"inactive_split_bytes.all.allocated": 99116112968192,
|
| 68 |
-
"inactive_split_bytes.all.current": 64545792,
|
| 69 |
-
"inactive_split_bytes.all.freed": 99116048422400,
|
| 70 |
-
"inactive_split_bytes.all.peak": 1129547776,
|
| 71 |
-
"inactive_split_bytes.large_pool.allocated": 89881653104128,
|
| 72 |
-
"inactive_split_bytes.large_pool.current": 58195968,
|
| 73 |
-
"inactive_split_bytes.large_pool.freed": 89881594908160,
|
| 74 |
-
"inactive_split_bytes.large_pool.peak": 1127497728,
|
| 75 |
-
"inactive_split_bytes.small_pool.allocated": 9234459864064,
|
| 76 |
-
"inactive_split_bytes.small_pool.current": 6349824,
|
| 77 |
-
"inactive_split_bytes.small_pool.freed": 9234453514240,
|
| 78 |
-
"inactive_split_bytes.small_pool.peak": 43309568,
|
| 79 |
-
"max_split_size": -1,
|
| 80 |
-
"num_alloc_retries": 0,
|
| 81 |
-
"num_device_alloc": 207,
|
| 82 |
-
"num_device_free": 0,
|
| 83 |
-
"num_ooms": 0,
|
| 84 |
-
"num_sync_all_streams": 0,
|
| 85 |
-
"oversize_allocations.allocated": 0,
|
| 86 |
-
"oversize_allocations.current": 0,
|
| 87 |
-
"oversize_allocations.freed": 0,
|
| 88 |
-
"oversize_allocations.peak": 0,
|
| 89 |
-
"oversize_segments.allocated": 0,
|
| 90 |
-
"oversize_segments.current": 0,
|
| 91 |
-
"oversize_segments.freed": 0,
|
| 92 |
-
"oversize_segments.peak": 0,
|
| 93 |
-
"requested_bytes.all.allocated": 103223002053941,
|
| 94 |
-
"requested_bytes.all.current": 458604360,
|
| 95 |
-
"requested_bytes.all.freed": 103222543449581,
|
| 96 |
-
"requested_bytes.all.peak": 5476793900,
|
| 97 |
-
"requested_bytes.large_pool.allocated": 94164945048224,
|
| 98 |
-
"requested_bytes.large_pool.current": 454473728,
|
| 99 |
-
"requested_bytes.large_pool.freed": 94164490574496,
|
| 100 |
-
"requested_bytes.large_pool.peak": 5457379328,
|
| 101 |
-
"requested_bytes.small_pool.allocated": 9058057005717,
|
| 102 |
-
"requested_bytes.small_pool.current": 4130632,
|
| 103 |
-
"requested_bytes.small_pool.freed": 9058052875085,
|
| 104 |
-
"requested_bytes.small_pool.peak": 98173436,
|
| 105 |
-
"reserved_bytes.all.allocated": 6840909824,
|
| 106 |
-
"reserved_bytes.all.current": 6840909824,
|
| 107 |
-
"reserved_bytes.all.freed": 0,
|
| 108 |
-
"reserved_bytes.all.peak": 6840909824,
|
| 109 |
-
"reserved_bytes.large_pool.allocated": 6729760768,
|
| 110 |
-
"reserved_bytes.large_pool.current": 6729760768,
|
| 111 |
-
"reserved_bytes.large_pool.freed": 0,
|
| 112 |
-
"reserved_bytes.large_pool.peak": 6729760768,
|
| 113 |
-
"reserved_bytes.small_pool.allocated": 111149056,
|
| 114 |
-
"reserved_bytes.small_pool.current": 111149056,
|
| 115 |
-
"reserved_bytes.small_pool.freed": 0,
|
| 116 |
-
"reserved_bytes.small_pool.peak": 111149056,
|
| 117 |
-
"segment.all.allocated": 207,
|
| 118 |
-
"segment.all.current": 207,
|
| 119 |
-
"segment.all.freed": 0,
|
| 120 |
-
"segment.all.peak": 207,
|
| 121 |
-
"segment.large_pool.allocated": 154,
|
| 122 |
-
"segment.large_pool.current": 154,
|
| 123 |
-
"segment.large_pool.freed": 0,
|
| 124 |
-
"segment.large_pool.peak": 154,
|
| 125 |
-
"segment.small_pool.allocated": 53,
|
| 126 |
-
"segment.small_pool.current": 53,
|
| 127 |
-
"segment.small_pool.freed": 0,
|
| 128 |
-
"segment.small_pool.peak": 53
|
| 129 |
-
}
|
| 130 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3c9aeec49df5b4a818d145ea556790311547ed51460e64ce052e591607b75ae
|
| 3 |
+
size 6130
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/gpu_stats_la.json
CHANGED
|
@@ -1,130 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"memory_reserved": 6345981952,
|
| 5 |
-
"max_memory_reserved": 6752829440,
|
| 6 |
-
"memory_stats": {
|
| 7 |
-
"active.all.allocated": 97930810,
|
| 8 |
-
"active.all.current": 10809,
|
| 9 |
-
"active.all.freed": 97920001,
|
| 10 |
-
"active.all.peak": 10985,
|
| 11 |
-
"active.large_pool.allocated": 46888718,
|
| 12 |
-
"active.large_pool.current": 222,
|
| 13 |
-
"active.large_pool.freed": 46888496,
|
| 14 |
-
"active.large_pool.peak": 360,
|
| 15 |
-
"active.small_pool.allocated": 51042092,
|
| 16 |
-
"active.small_pool.current": 10587,
|
| 17 |
-
"active.small_pool.freed": 51031505,
|
| 18 |
-
"active.small_pool.peak": 10750,
|
| 19 |
-
"active_bytes.all.allocated": 312846567023616,
|
| 20 |
-
"active_bytes.all.current": 808552960,
|
| 21 |
-
"active_bytes.all.freed": 312845758470656,
|
| 22 |
-
"active_bytes.all.peak": 5015770624,
|
| 23 |
-
"active_bytes.large_pool.allocated": 299458681420800,
|
| 24 |
-
"active_bytes.large_pool.current": 799277056,
|
| 25 |
-
"active_bytes.large_pool.freed": 299457882143744,
|
| 26 |
-
"active_bytes.large_pool.peak": 4998576128,
|
| 27 |
-
"active_bytes.small_pool.allocated": 13387885602816,
|
| 28 |
-
"active_bytes.small_pool.current": 9275904,
|
| 29 |
-
"active_bytes.small_pool.freed": 13387876326912,
|
| 30 |
-
"active_bytes.small_pool.peak": 124137472,
|
| 31 |
-
"allocated_bytes.all.allocated": 312846567023616,
|
| 32 |
-
"allocated_bytes.all.current": 808552960,
|
| 33 |
-
"allocated_bytes.all.freed": 312845758470656,
|
| 34 |
-
"allocated_bytes.all.peak": 5015770624,
|
| 35 |
-
"allocated_bytes.large_pool.allocated": 299458681420800,
|
| 36 |
-
"allocated_bytes.large_pool.current": 799277056,
|
| 37 |
-
"allocated_bytes.large_pool.freed": 299457882143744,
|
| 38 |
-
"allocated_bytes.large_pool.peak": 4998576128,
|
| 39 |
-
"allocated_bytes.small_pool.allocated": 13387885602816,
|
| 40 |
-
"allocated_bytes.small_pool.current": 9275904,
|
| 41 |
-
"allocated_bytes.small_pool.freed": 13387876326912,
|
| 42 |
-
"allocated_bytes.small_pool.peak": 124137472,
|
| 43 |
-
"allocation.all.allocated": 97930810,
|
| 44 |
-
"allocation.all.current": 10809,
|
| 45 |
-
"allocation.all.freed": 97920001,
|
| 46 |
-
"allocation.all.peak": 10985,
|
| 47 |
-
"allocation.large_pool.allocated": 46888718,
|
| 48 |
-
"allocation.large_pool.current": 222,
|
| 49 |
-
"allocation.large_pool.freed": 46888496,
|
| 50 |
-
"allocation.large_pool.peak": 360,
|
| 51 |
-
"allocation.small_pool.allocated": 51042092,
|
| 52 |
-
"allocation.small_pool.current": 10587,
|
| 53 |
-
"allocation.small_pool.freed": 51031505,
|
| 54 |
-
"allocation.small_pool.peak": 10750,
|
| 55 |
-
"inactive_split.all.allocated": 51328722,
|
| 56 |
-
"inactive_split.all.current": 125,
|
| 57 |
-
"inactive_split.all.freed": 51328597,
|
| 58 |
-
"inactive_split.all.peak": 1966,
|
| 59 |
-
"inactive_split.large_pool.allocated": 32685617,
|
| 60 |
-
"inactive_split.large_pool.current": 43,
|
| 61 |
-
"inactive_split.large_pool.freed": 32685574,
|
| 62 |
-
"inactive_split.large_pool.peak": 141,
|
| 63 |
-
"inactive_split.small_pool.allocated": 18643105,
|
| 64 |
-
"inactive_split.small_pool.current": 82,
|
| 65 |
-
"inactive_split.small_pool.freed": 18643023,
|
| 66 |
-
"inactive_split.small_pool.peak": 1920,
|
| 67 |
-
"inactive_split_bytes.all.allocated": 288630388610048,
|
| 68 |
-
"inactive_split_bytes.all.current": 143554048,
|
| 69 |
-
"inactive_split_bytes.all.freed": 288630245056000,
|
| 70 |
-
"inactive_split_bytes.all.peak": 1893863936,
|
| 71 |
-
"inactive_split_bytes.large_pool.allocated": 274820238239232,
|
| 72 |
-
"inactive_split_bytes.large_pool.current": 121372672,
|
| 73 |
-
"inactive_split_bytes.large_pool.freed": 274820116866560,
|
| 74 |
-
"inactive_split_bytes.large_pool.peak": 1887404032,
|
| 75 |
-
"inactive_split_bytes.small_pool.allocated": 13810150370816,
|
| 76 |
-
"inactive_split_bytes.small_pool.current": 22181376,
|
| 77 |
-
"inactive_split_bytes.small_pool.freed": 13810128189440,
|
| 78 |
-
"inactive_split_bytes.small_pool.peak": 85829632,
|
| 79 |
-
"max_split_size": -1,
|
| 80 |
-
"num_alloc_retries": 0,
|
| 81 |
-
"num_device_alloc": 1627,
|
| 82 |
-
"num_device_free": 1322,
|
| 83 |
-
"num_ooms": 0,
|
| 84 |
-
"num_sync_all_streams": 7,
|
| 85 |
-
"oversize_allocations.allocated": 0,
|
| 86 |
-
"oversize_allocations.current": 0,
|
| 87 |
-
"oversize_allocations.freed": 0,
|
| 88 |
-
"oversize_allocations.peak": 0,
|
| 89 |
-
"oversize_segments.allocated": 0,
|
| 90 |
-
"oversize_segments.current": 0,
|
| 91 |
-
"oversize_segments.freed": 0,
|
| 92 |
-
"oversize_segments.peak": 0,
|
| 93 |
-
"requested_bytes.all.allocated": 309521702594700,
|
| 94 |
-
"requested_bytes.all.current": 800396976,
|
| 95 |
-
"requested_bytes.all.freed": 309520902197724,
|
| 96 |
-
"requested_bytes.all.peak": 4992933420,
|
| 97 |
-
"requested_bytes.large_pool.allocated": 296138531900416,
|
| 98 |
-
"requested_bytes.large_pool.current": 795392000,
|
| 99 |
-
"requested_bytes.large_pool.freed": 296137736508416,
|
| 100 |
-
"requested_bytes.large_pool.peak": 4975769600,
|
| 101 |
-
"requested_bytes.small_pool.allocated": 13383170694284,
|
| 102 |
-
"requested_bytes.small_pool.current": 5004976,
|
| 103 |
-
"requested_bytes.small_pool.freed": 13383165689308,
|
| 104 |
-
"requested_bytes.small_pool.peak": 119884008,
|
| 105 |
-
"reserved_bytes.all.allocated": 46401585152,
|
| 106 |
-
"reserved_bytes.all.current": 6345981952,
|
| 107 |
-
"reserved_bytes.all.freed": 40055603200,
|
| 108 |
-
"reserved_bytes.all.peak": 6752829440,
|
| 109 |
-
"reserved_bytes.large_pool.allocated": 45720010752,
|
| 110 |
-
"reserved_bytes.large_pool.current": 6218055680,
|
| 111 |
-
"reserved_bytes.large_pool.freed": 39501955072,
|
| 112 |
-
"reserved_bytes.large_pool.peak": 6687817728,
|
| 113 |
-
"reserved_bytes.small_pool.allocated": 681574400,
|
| 114 |
-
"reserved_bytes.small_pool.current": 127926272,
|
| 115 |
-
"reserved_bytes.small_pool.freed": 553648128,
|
| 116 |
-
"reserved_bytes.small_pool.peak": 127926272,
|
| 117 |
-
"segment.all.allocated": 1627,
|
| 118 |
-
"segment.all.current": 305,
|
| 119 |
-
"segment.all.freed": 1322,
|
| 120 |
-
"segment.all.peak": 306,
|
| 121 |
-
"segment.large_pool.allocated": 1302,
|
| 122 |
-
"segment.large_pool.current": 244,
|
| 123 |
-
"segment.large_pool.freed": 1058,
|
| 124 |
-
"segment.large_pool.peak": 245,
|
| 125 |
-
"segment.small_pool.allocated": 325,
|
| 126 |
-
"segment.small_pool.current": 61,
|
| 127 |
-
"segment.small_pool.freed": 264,
|
| 128 |
-
"segment.small_pool.peak": 61
|
| 129 |
-
}
|
| 130 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60850e6b893473f88e3421e4e2d6cf85cab0ecb9d65f19e813668eb10e67e80e
|
| 3 |
+
size 6219
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/special_tokens_map.json
CHANGED
|
@@ -1,7 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"pad_token": "[PAD]",
|
| 5 |
-
"sep_token": "[SEP]",
|
| 6 |
-
"unk_token": "[UNK]"
|
| 7 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6d346be366a7d1d48332dbc9fdf3bf8960b5d879522b7799ddba59e76237ee3
|
| 3 |
+
size 125
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_16373/tokenizer_config.json
CHANGED
|
@@ -1,55 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"content": "[PAD]",
|
| 5 |
-
"lstrip": false,
|
| 6 |
-
"normalized": false,
|
| 7 |
-
"rstrip": false,
|
| 8 |
-
"single_word": false,
|
| 9 |
-
"special": true
|
| 10 |
-
},
|
| 11 |
-
"100": {
|
| 12 |
-
"content": "[UNK]",
|
| 13 |
-
"lstrip": false,
|
| 14 |
-
"normalized": false,
|
| 15 |
-
"rstrip": false,
|
| 16 |
-
"single_word": false,
|
| 17 |
-
"special": true
|
| 18 |
-
},
|
| 19 |
-
"101": {
|
| 20 |
-
"content": "[CLS]",
|
| 21 |
-
"lstrip": false,
|
| 22 |
-
"normalized": false,
|
| 23 |
-
"rstrip": false,
|
| 24 |
-
"single_word": false,
|
| 25 |
-
"special": true
|
| 26 |
-
},
|
| 27 |
-
"102": {
|
| 28 |
-
"content": "[SEP]",
|
| 29 |
-
"lstrip": false,
|
| 30 |
-
"normalized": false,
|
| 31 |
-
"rstrip": false,
|
| 32 |
-
"single_word": false,
|
| 33 |
-
"special": true
|
| 34 |
-
},
|
| 35 |
-
"103": {
|
| 36 |
-
"content": "[MASK]",
|
| 37 |
-
"lstrip": false,
|
| 38 |
-
"normalized": false,
|
| 39 |
-
"rstrip": false,
|
| 40 |
-
"single_word": false,
|
| 41 |
-
"special": true
|
| 42 |
-
}
|
| 43 |
-
},
|
| 44 |
-
"clean_up_tokenization_spaces": true,
|
| 45 |
-
"cls_token": "[CLS]",
|
| 46 |
-
"do_lower_case": true,
|
| 47 |
-
"mask_token": "[MASK]",
|
| 48 |
-
"model_max_length": 512,
|
| 49 |
-
"pad_token": "[PAD]",
|
| 50 |
-
"sep_token": "[SEP]",
|
| 51 |
-
"strip_accents": null,
|
| 52 |
-
"tokenize_chinese_chars": true,
|
| 53 |
-
"tokenizer_class": "BertTokenizer",
|
| 54 |
-
"unk_token": "[UNK]"
|
| 55 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f62a57a75856b93282501c92a86f62b169997c81e93cf6f75b7cc15d6285968e
|
| 3 |
+
size 1190
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/adapter_config.json
CHANGED
|
@@ -1,32 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"base_model_name_or_path": "bert-base-uncased",
|
| 5 |
-
"bias": "none",
|
| 6 |
-
"fan_in_fan_out": false,
|
| 7 |
-
"inference_mode": true,
|
| 8 |
-
"init_lora_weights": true,
|
| 9 |
-
"layer_replication": null,
|
| 10 |
-
"layers_pattern": null,
|
| 11 |
-
"layers_to_transform": null,
|
| 12 |
-
"loftq_config": {},
|
| 13 |
-
"lora_alpha": 16,
|
| 14 |
-
"lora_dropout": 0.1,
|
| 15 |
-
"megatron_config": null,
|
| 16 |
-
"megatron_core": "megatron.core",
|
| 17 |
-
"modules_to_save": [
|
| 18 |
-
"classifier",
|
| 19 |
-
"score"
|
| 20 |
-
],
|
| 21 |
-
"peft_type": "LORA",
|
| 22 |
-
"r": 8,
|
| 23 |
-
"rank_pattern": {},
|
| 24 |
-
"revision": null,
|
| 25 |
-
"target_modules": [
|
| 26 |
-
"value",
|
| 27 |
-
"query"
|
| 28 |
-
],
|
| 29 |
-
"task_type": "SEQ_CLS",
|
| 30 |
-
"use_dora": false,
|
| 31 |
-
"use_rslora": false
|
| 32 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81a29a631d083a248a7fe8b5acc0d972f1e62158c7e9d95bdbbeffc2630802a8
|
| 3 |
+
size 668
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/all_results.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ccf67cb852c7608079eea2c8cd7bc80f05b2eca0923e5509838443f6254c8e5
|
| 3 |
+
size 68
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/all_results_la_kron_all_homo_mc_corr_1000.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ccf67cb852c7608079eea2c8cd7bc80f05b2eca0923e5509838443f6254c8e5
|
| 3 |
+
size 68
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/all_results_val.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6d52a14bd194a36b84555b9e5d162c67d7c2643e03a76d01b432cd519b7741a
|
| 3 |
+
size 68
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/eval_res.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/eval_res_la_kron_all_homo_mc_corr_1000.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/gpu_stats.json
CHANGED
|
@@ -1,130 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"memory_reserved": 6840909824,
|
| 5 |
-
"max_memory_reserved": 6840909824,
|
| 6 |
-
"memory_stats": {
|
| 7 |
-
"active.all.allocated": 49671300,
|
| 8 |
-
"active.all.current": 367,
|
| 9 |
-
"active.all.freed": 49670933,
|
| 10 |
-
"active.all.peak": 627,
|
| 11 |
-
"active.large_pool.allocated": 24072103,
|
| 12 |
-
"active.large_pool.current": 77,
|
| 13 |
-
"active.large_pool.freed": 24072026,
|
| 14 |
-
"active.large_pool.peak": 250,
|
| 15 |
-
"active.small_pool.allocated": 25599197,
|
| 16 |
-
"active.small_pool.current": 290,
|
| 17 |
-
"active.small_pool.freed": 25598907,
|
| 18 |
-
"active.small_pool.peak": 538,
|
| 19 |
-
"active_bytes.all.allocated": 143211789438464,
|
| 20 |
-
"active_bytes.all.current": 459742208,
|
| 21 |
-
"active_bytes.all.freed": 143211329696256,
|
| 22 |
-
"active_bytes.all.peak": 5480528896,
|
| 23 |
-
"active_bytes.large_pool.allocated": 131754566969344,
|
| 24 |
-
"active_bytes.large_pool.current": 455606272,
|
| 25 |
-
"active_bytes.large_pool.freed": 131754111363072,
|
| 26 |
-
"active_bytes.large_pool.peak": 5461108736,
|
| 27 |
-
"active_bytes.small_pool.allocated": 11457222469120,
|
| 28 |
-
"active_bytes.small_pool.current": 4135936,
|
| 29 |
-
"active_bytes.small_pool.freed": 11457218333184,
|
| 30 |
-
"active_bytes.small_pool.peak": 98207744,
|
| 31 |
-
"allocated_bytes.all.allocated": 143211789438464,
|
| 32 |
-
"allocated_bytes.all.current": 459742208,
|
| 33 |
-
"allocated_bytes.all.freed": 143211329696256,
|
| 34 |
-
"allocated_bytes.all.peak": 5480528896,
|
| 35 |
-
"allocated_bytes.large_pool.allocated": 131754566969344,
|
| 36 |
-
"allocated_bytes.large_pool.current": 455606272,
|
| 37 |
-
"allocated_bytes.large_pool.freed": 131754111363072,
|
| 38 |
-
"allocated_bytes.large_pool.peak": 5461108736,
|
| 39 |
-
"allocated_bytes.small_pool.allocated": 11457222469120,
|
| 40 |
-
"allocated_bytes.small_pool.current": 4135936,
|
| 41 |
-
"allocated_bytes.small_pool.freed": 11457218333184,
|
| 42 |
-
"allocated_bytes.small_pool.peak": 98207744,
|
| 43 |
-
"allocation.all.allocated": 49671300,
|
| 44 |
-
"allocation.all.current": 367,
|
| 45 |
-
"allocation.all.freed": 49670933,
|
| 46 |
-
"allocation.all.peak": 627,
|
| 47 |
-
"allocation.large_pool.allocated": 24072103,
|
| 48 |
-
"allocation.large_pool.current": 77,
|
| 49 |
-
"allocation.large_pool.freed": 24072026,
|
| 50 |
-
"allocation.large_pool.peak": 250,
|
| 51 |
-
"allocation.small_pool.allocated": 25599197,
|
| 52 |
-
"allocation.small_pool.current": 290,
|
| 53 |
-
"allocation.small_pool.freed": 25598907,
|
| 54 |
-
"allocation.small_pool.peak": 538,
|
| 55 |
-
"inactive_split.all.allocated": 26126377,
|
| 56 |
-
"inactive_split.all.current": 33,
|
| 57 |
-
"inactive_split.all.freed": 26126344,
|
| 58 |
-
"inactive_split.all.peak": 143,
|
| 59 |
-
"inactive_split.large_pool.allocated": 18969295,
|
| 60 |
-
"inactive_split.large_pool.current": 19,
|
| 61 |
-
"inactive_split.large_pool.freed": 18969276,
|
| 62 |
-
"inactive_split.large_pool.peak": 127,
|
| 63 |
-
"inactive_split.small_pool.allocated": 7157082,
|
| 64 |
-
"inactive_split.small_pool.current": 14,
|
| 65 |
-
"inactive_split.small_pool.freed": 7157068,
|
| 66 |
-
"inactive_split.small_pool.peak": 70,
|
| 67 |
-
"inactive_split_bytes.all.allocated": 129491747222528,
|
| 68 |
-
"inactive_split_bytes.all.current": 64545792,
|
| 69 |
-
"inactive_split_bytes.all.freed": 129491682676736,
|
| 70 |
-
"inactive_split_bytes.all.peak": 1129547776,
|
| 71 |
-
"inactive_split_bytes.large_pool.allocated": 117820220749824,
|
| 72 |
-
"inactive_split_bytes.large_pool.current": 58195968,
|
| 73 |
-
"inactive_split_bytes.large_pool.freed": 117820162553856,
|
| 74 |
-
"inactive_split_bytes.large_pool.peak": 1127497728,
|
| 75 |
-
"inactive_split_bytes.small_pool.allocated": 11671526472704,
|
| 76 |
-
"inactive_split_bytes.small_pool.current": 6349824,
|
| 77 |
-
"inactive_split_bytes.small_pool.freed": 11671520122880,
|
| 78 |
-
"inactive_split_bytes.small_pool.peak": 43309568,
|
| 79 |
-
"max_split_size": -1,
|
| 80 |
-
"num_alloc_retries": 0,
|
| 81 |
-
"num_device_alloc": 207,
|
| 82 |
-
"num_device_free": 0,
|
| 83 |
-
"num_ooms": 0,
|
| 84 |
-
"num_sync_all_streams": 0,
|
| 85 |
-
"oversize_allocations.allocated": 0,
|
| 86 |
-
"oversize_allocations.current": 0,
|
| 87 |
-
"oversize_allocations.freed": 0,
|
| 88 |
-
"oversize_allocations.peak": 0,
|
| 89 |
-
"oversize_segments.allocated": 0,
|
| 90 |
-
"oversize_segments.current": 0,
|
| 91 |
-
"oversize_segments.freed": 0,
|
| 92 |
-
"oversize_segments.peak": 0,
|
| 93 |
-
"requested_bytes.all.allocated": 135538096736142,
|
| 94 |
-
"requested_bytes.all.current": 458603592,
|
| 95 |
-
"requested_bytes.all.freed": 135537638132550,
|
| 96 |
-
"requested_bytes.all.peak": 5476793900,
|
| 97 |
-
"requested_bytes.large_pool.allocated": 124083469228256,
|
| 98 |
-
"requested_bytes.large_pool.current": 454473728,
|
| 99 |
-
"requested_bytes.large_pool.freed": 124083014754528,
|
| 100 |
-
"requested_bytes.large_pool.peak": 5457379328,
|
| 101 |
-
"requested_bytes.small_pool.allocated": 11454627507886,
|
| 102 |
-
"requested_bytes.small_pool.current": 4129864,
|
| 103 |
-
"requested_bytes.small_pool.freed": 11454623378022,
|
| 104 |
-
"requested_bytes.small_pool.peak": 98173436,
|
| 105 |
-
"reserved_bytes.all.allocated": 6840909824,
|
| 106 |
-
"reserved_bytes.all.current": 6840909824,
|
| 107 |
-
"reserved_bytes.all.freed": 0,
|
| 108 |
-
"reserved_bytes.all.peak": 6840909824,
|
| 109 |
-
"reserved_bytes.large_pool.allocated": 6729760768,
|
| 110 |
-
"reserved_bytes.large_pool.current": 6729760768,
|
| 111 |
-
"reserved_bytes.large_pool.freed": 0,
|
| 112 |
-
"reserved_bytes.large_pool.peak": 6729760768,
|
| 113 |
-
"reserved_bytes.small_pool.allocated": 111149056,
|
| 114 |
-
"reserved_bytes.small_pool.current": 111149056,
|
| 115 |
-
"reserved_bytes.small_pool.freed": 0,
|
| 116 |
-
"reserved_bytes.small_pool.peak": 111149056,
|
| 117 |
-
"segment.all.allocated": 207,
|
| 118 |
-
"segment.all.current": 207,
|
| 119 |
-
"segment.all.freed": 0,
|
| 120 |
-
"segment.all.peak": 207,
|
| 121 |
-
"segment.large_pool.allocated": 154,
|
| 122 |
-
"segment.large_pool.current": 154,
|
| 123 |
-
"segment.large_pool.freed": 0,
|
| 124 |
-
"segment.large_pool.peak": 154,
|
| 125 |
-
"segment.small_pool.allocated": 53,
|
| 126 |
-
"segment.small_pool.current": 53,
|
| 127 |
-
"segment.small_pool.freed": 0,
|
| 128 |
-
"segment.small_pool.peak": 53
|
| 129 |
-
}
|
| 130 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26b30c08de5574b5ef27faf351180ea0a5644d8627266c3e5329f25addb63944
|
| 3 |
+
size 6144
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/gpu_stats_la.json
CHANGED
|
@@ -1,130 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"memory_reserved": 6350176256,
|
| 5 |
-
"max_memory_reserved": 6752829440,
|
| 6 |
-
"memory_stats": {
|
| 7 |
-
"active.all.allocated": 122413512,
|
| 8 |
-
"active.all.current": 10809,
|
| 9 |
-
"active.all.freed": 122402703,
|
| 10 |
-
"active.all.peak": 10985,
|
| 11 |
-
"active.large_pool.allocated": 58610897,
|
| 12 |
-
"active.large_pool.current": 222,
|
| 13 |
-
"active.large_pool.freed": 58610675,
|
| 14 |
-
"active.large_pool.peak": 360,
|
| 15 |
-
"active.small_pool.allocated": 63802615,
|
| 16 |
-
"active.small_pool.current": 10587,
|
| 17 |
-
"active.small_pool.freed": 63792028,
|
| 18 |
-
"active.small_pool.peak": 10750,
|
| 19 |
-
"active_bytes.all.allocated": 391045242961408,
|
| 20 |
-
"active_bytes.all.current": 808356352,
|
| 21 |
-
"active_bytes.all.freed": 391044434605056,
|
| 22 |
-
"active_bytes.all.peak": 5015770624,
|
| 23 |
-
"active_bytes.large_pool.allocated": 374310385957888,
|
| 24 |
-
"active_bytes.large_pool.current": 799080448,
|
| 25 |
-
"active_bytes.large_pool.freed": 374309586877440,
|
| 26 |
-
"active_bytes.large_pool.peak": 4998576128,
|
| 27 |
-
"active_bytes.small_pool.allocated": 16734857003520,
|
| 28 |
-
"active_bytes.small_pool.current": 9275904,
|
| 29 |
-
"active_bytes.small_pool.freed": 16734847727616,
|
| 30 |
-
"active_bytes.small_pool.peak": 124137472,
|
| 31 |
-
"allocated_bytes.all.allocated": 391045242961408,
|
| 32 |
-
"allocated_bytes.all.current": 808356352,
|
| 33 |
-
"allocated_bytes.all.freed": 391044434605056,
|
| 34 |
-
"allocated_bytes.all.peak": 5015770624,
|
| 35 |
-
"allocated_bytes.large_pool.allocated": 374310385957888,
|
| 36 |
-
"allocated_bytes.large_pool.current": 799080448,
|
| 37 |
-
"allocated_bytes.large_pool.freed": 374309586877440,
|
| 38 |
-
"allocated_bytes.large_pool.peak": 4998576128,
|
| 39 |
-
"allocated_bytes.small_pool.allocated": 16734857003520,
|
| 40 |
-
"allocated_bytes.small_pool.current": 9275904,
|
| 41 |
-
"allocated_bytes.small_pool.freed": 16734847727616,
|
| 42 |
-
"allocated_bytes.small_pool.peak": 124137472,
|
| 43 |
-
"allocation.all.allocated": 122413512,
|
| 44 |
-
"allocation.all.current": 10809,
|
| 45 |
-
"allocation.all.freed": 122402703,
|
| 46 |
-
"allocation.all.peak": 10985,
|
| 47 |
-
"allocation.large_pool.allocated": 58610897,
|
| 48 |
-
"allocation.large_pool.current": 222,
|
| 49 |
-
"allocation.large_pool.freed": 58610675,
|
| 50 |
-
"allocation.large_pool.peak": 360,
|
| 51 |
-
"allocation.small_pool.allocated": 63802615,
|
| 52 |
-
"allocation.small_pool.current": 10587,
|
| 53 |
-
"allocation.small_pool.freed": 63792028,
|
| 54 |
-
"allocation.small_pool.peak": 10750,
|
| 55 |
-
"inactive_split.all.allocated": 64144648,
|
| 56 |
-
"inactive_split.all.current": 110,
|
| 57 |
-
"inactive_split.all.freed": 64144538,
|
| 58 |
-
"inactive_split.all.peak": 1980,
|
| 59 |
-
"inactive_split.large_pool.allocated": 40914502,
|
| 60 |
-
"inactive_split.large_pool.current": 38,
|
| 61 |
-
"inactive_split.large_pool.freed": 40914464,
|
| 62 |
-
"inactive_split.large_pool.peak": 141,
|
| 63 |
-
"inactive_split.small_pool.allocated": 23230146,
|
| 64 |
-
"inactive_split.small_pool.current": 72,
|
| 65 |
-
"inactive_split.small_pool.freed": 23230074,
|
| 66 |
-
"inactive_split.small_pool.peak": 1934,
|
| 67 |
-
"inactive_split_bytes.all.allocated": 360737834738688,
|
| 68 |
-
"inactive_split_bytes.all.current": 120681984,
|
| 69 |
-
"inactive_split_bytes.all.freed": 360737714056704,
|
| 70 |
-
"inactive_split_bytes.all.peak": 1893863936,
|
| 71 |
-
"inactive_split_bytes.large_pool.allocated": 343478534163456,
|
| 72 |
-
"inactive_split_bytes.large_pool.current": 92209152,
|
| 73 |
-
"inactive_split_bytes.large_pool.freed": 343478441954304,
|
| 74 |
-
"inactive_split_bytes.large_pool.peak": 1887404032,
|
| 75 |
-
"inactive_split_bytes.small_pool.allocated": 17259300575232,
|
| 76 |
-
"inactive_split_bytes.small_pool.current": 28472832,
|
| 77 |
-
"inactive_split_bytes.small_pool.freed": 17259272102400,
|
| 78 |
-
"inactive_split_bytes.small_pool.peak": 85829632,
|
| 79 |
-
"max_split_size": -1,
|
| 80 |
-
"num_alloc_retries": 0,
|
| 81 |
-
"num_device_alloc": 2025,
|
| 82 |
-
"num_device_free": 1719,
|
| 83 |
-
"num_ooms": 0,
|
| 84 |
-
"num_sync_all_streams": 9,
|
| 85 |
-
"oversize_allocations.allocated": 0,
|
| 86 |
-
"oversize_allocations.current": 0,
|
| 87 |
-
"oversize_allocations.freed": 0,
|
| 88 |
-
"oversize_allocations.peak": 0,
|
| 89 |
-
"oversize_segments.allocated": 0,
|
| 90 |
-
"oversize_segments.current": 0,
|
| 91 |
-
"oversize_segments.freed": 0,
|
| 92 |
-
"oversize_segments.peak": 0,
|
| 93 |
-
"requested_bytes.all.allocated": 386902123983535,
|
| 94 |
-
"requested_bytes.all.current": 800396976,
|
| 95 |
-
"requested_bytes.all.freed": 386901323586559,
|
| 96 |
-
"requested_bytes.all.peak": 4992933420,
|
| 97 |
-
"requested_bytes.large_pool.allocated": 370173160615680,
|
| 98 |
-
"requested_bytes.large_pool.current": 795392000,
|
| 99 |
-
"requested_bytes.large_pool.freed": 370172365223680,
|
| 100 |
-
"requested_bytes.large_pool.peak": 4975769600,
|
| 101 |
-
"requested_bytes.small_pool.allocated": 16728963367855,
|
| 102 |
-
"requested_bytes.small_pool.current": 5004976,
|
| 103 |
-
"requested_bytes.small_pool.freed": 16728958362879,
|
| 104 |
-
"requested_bytes.small_pool.peak": 119884008,
|
| 105 |
-
"reserved_bytes.all.allocated": 57814286336,
|
| 106 |
-
"reserved_bytes.all.current": 6350176256,
|
| 107 |
-
"reserved_bytes.all.freed": 51464110080,
|
| 108 |
-
"reserved_bytes.all.peak": 6752829440,
|
| 109 |
-
"reserved_bytes.large_pool.allocated": 56964939776,
|
| 110 |
-
"reserved_bytes.large_pool.current": 6222249984,
|
| 111 |
-
"reserved_bytes.large_pool.freed": 50742689792,
|
| 112 |
-
"reserved_bytes.large_pool.peak": 6687817728,
|
| 113 |
-
"reserved_bytes.small_pool.allocated": 849346560,
|
| 114 |
-
"reserved_bytes.small_pool.current": 127926272,
|
| 115 |
-
"reserved_bytes.small_pool.freed": 721420288,
|
| 116 |
-
"reserved_bytes.small_pool.peak": 127926272,
|
| 117 |
-
"segment.all.allocated": 2025,
|
| 118 |
-
"segment.all.current": 306,
|
| 119 |
-
"segment.all.freed": 1719,
|
| 120 |
-
"segment.all.peak": 306,
|
| 121 |
-
"segment.large_pool.allocated": 1620,
|
| 122 |
-
"segment.large_pool.current": 245,
|
| 123 |
-
"segment.large_pool.freed": 1375,
|
| 124 |
-
"segment.large_pool.peak": 245,
|
| 125 |
-
"segment.small_pool.allocated": 405,
|
| 126 |
-
"segment.small_pool.current": 61,
|
| 127 |
-
"segment.small_pool.freed": 344,
|
| 128 |
-
"segment.small_pool.peak": 61
|
| 129 |
-
}
|
| 130 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fcd4a2ef2bb96bfd342772ad610ebbc7979c09468fa057fb1f25e0939ceb08b
|
| 3 |
+
size 6222
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/special_tokens_map.json
CHANGED
|
@@ -1,7 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"pad_token": "[PAD]",
|
| 5 |
-
"sep_token": "[SEP]",
|
| 6 |
-
"unk_token": "[UNK]"
|
| 7 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6d346be366a7d1d48332dbc9fdf3bf8960b5d879522b7799ddba59e76237ee3
|
| 3 |
+
size 125
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_21831/tokenizer_config.json
CHANGED
|
@@ -1,55 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"content": "[PAD]",
|
| 5 |
-
"lstrip": false,
|
| 6 |
-
"normalized": false,
|
| 7 |
-
"rstrip": false,
|
| 8 |
-
"single_word": false,
|
| 9 |
-
"special": true
|
| 10 |
-
},
|
| 11 |
-
"100": {
|
| 12 |
-
"content": "[UNK]",
|
| 13 |
-
"lstrip": false,
|
| 14 |
-
"normalized": false,
|
| 15 |
-
"rstrip": false,
|
| 16 |
-
"single_word": false,
|
| 17 |
-
"special": true
|
| 18 |
-
},
|
| 19 |
-
"101": {
|
| 20 |
-
"content": "[CLS]",
|
| 21 |
-
"lstrip": false,
|
| 22 |
-
"normalized": false,
|
| 23 |
-
"rstrip": false,
|
| 24 |
-
"single_word": false,
|
| 25 |
-
"special": true
|
| 26 |
-
},
|
| 27 |
-
"102": {
|
| 28 |
-
"content": "[SEP]",
|
| 29 |
-
"lstrip": false,
|
| 30 |
-
"normalized": false,
|
| 31 |
-
"rstrip": false,
|
| 32 |
-
"single_word": false,
|
| 33 |
-
"special": true
|
| 34 |
-
},
|
| 35 |
-
"103": {
|
| 36 |
-
"content": "[MASK]",
|
| 37 |
-
"lstrip": false,
|
| 38 |
-
"normalized": false,
|
| 39 |
-
"rstrip": false,
|
| 40 |
-
"single_word": false,
|
| 41 |
-
"special": true
|
| 42 |
-
}
|
| 43 |
-
},
|
| 44 |
-
"clean_up_tokenization_spaces": true,
|
| 45 |
-
"cls_token": "[CLS]",
|
| 46 |
-
"do_lower_case": true,
|
| 47 |
-
"mask_token": "[MASK]",
|
| 48 |
-
"model_max_length": 512,
|
| 49 |
-
"pad_token": "[PAD]",
|
| 50 |
-
"sep_token": "[SEP]",
|
| 51 |
-
"strip_accents": null,
|
| 52 |
-
"tokenize_chinese_chars": true,
|
| 53 |
-
"tokenizer_class": "BertTokenizer",
|
| 54 |
-
"unk_token": "[UNK]"
|
| 55 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f62a57a75856b93282501c92a86f62b169997c81e93cf6f75b7cc15d6285968e
|
| 3 |
+
size 1190
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_27290/adapter_config.json
CHANGED
|
@@ -1,32 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"base_model_name_or_path": "bert-base-uncased",
|
| 5 |
-
"bias": "none",
|
| 6 |
-
"fan_in_fan_out": false,
|
| 7 |
-
"inference_mode": true,
|
| 8 |
-
"init_lora_weights": true,
|
| 9 |
-
"layer_replication": null,
|
| 10 |
-
"layers_pattern": null,
|
| 11 |
-
"layers_to_transform": null,
|
| 12 |
-
"loftq_config": {},
|
| 13 |
-
"lora_alpha": 16,
|
| 14 |
-
"lora_dropout": 0.1,
|
| 15 |
-
"megatron_config": null,
|
| 16 |
-
"megatron_core": "megatron.core",
|
| 17 |
-
"modules_to_save": [
|
| 18 |
-
"classifier",
|
| 19 |
-
"score"
|
| 20 |
-
],
|
| 21 |
-
"peft_type": "LORA",
|
| 22 |
-
"r": 8,
|
| 23 |
-
"rank_pattern": {},
|
| 24 |
-
"revision": null,
|
| 25 |
-
"target_modules": [
|
| 26 |
-
"value",
|
| 27 |
-
"query"
|
| 28 |
-
],
|
| 29 |
-
"task_type": "SEQ_CLS",
|
| 30 |
-
"use_dora": false,
|
| 31 |
-
"use_rslora": false
|
| 32 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81a29a631d083a248a7fe8b5acc0d972f1e62158c7e9d95bdbbeffc2630802a8
|
| 3 |
+
size 668
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_27290/all_results.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc643f884b586fc34176513468477242eae688c5736d12a8fb5978b2db4685bd
|
| 3 |
+
size 68
|
outputs/qqp/bert-base-uncased_loratrain_val_8_16_0.1_0.0001_65/step_27290/all_results_la_kron_all_homo_mc_corr_1000.json
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc5cc5223d349f01abe3cc3ffb4d04d5bede67d7003918482f511d6342c75c13
|
| 3 |
+
size 68
|