Initial upload
Browse files- checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth +3 -0
- checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json +29 -0
- checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log +0 -0
- checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth +3 -0
- checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json +30 -0
- checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log +0 -0
- checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/partial_model_weights.pth +3 -0
- checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/train_config.json +30 -0
- checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/train_log.log +0 -0
- checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth +3 -0
- checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json +30 -0
- checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log +0 -0
- checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth +3 -0
- checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json +30 -0
- checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log +0 -0
- checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth +3 -0
- checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json +30 -0
- checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log +0 -0
- checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth +3 -0
- checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json +30 -0
- checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log +0 -0
- checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth +3 -0
- checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json +30 -0
- checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log +0 -0
- checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth +3 -0
- checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json +30 -0
- checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log +0 -0
- checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth +3 -0
- checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json +29 -0
- checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log +0 -0
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38b565d802db011d4a2c09c8cbd589b2f4856975cfaec6462d72851d7f1421de
|
| 3 |
+
size 1975220226
|
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"stage": "stage2",
|
| 3 |
+
"lr": 3e-05,
|
| 4 |
+
"epochs": 10,
|
| 5 |
+
"log_interval": 4,
|
| 6 |
+
"gradient_clip": 1.0,
|
| 7 |
+
"tr_batch_size": 4,
|
| 8 |
+
"te_batch_size": 4,
|
| 9 |
+
"gradient_accumulation_steps": 1,
|
| 10 |
+
"update_params": [
|
| 11 |
+
"all"
|
| 12 |
+
],
|
| 13 |
+
"corpus": "infer_type",
|
| 14 |
+
"num_of_sents": [
|
| 15 |
+
12,
|
| 16 |
+
12
|
| 17 |
+
],
|
| 18 |
+
"encoder": "bert-base-cased",
|
| 19 |
+
"repeat": 2,
|
| 20 |
+
"max_num_each_cat": 200,
|
| 21 |
+
"fb_mode": 0.0,
|
| 22 |
+
"set_loss_mask": false,
|
| 23 |
+
"use_label_dec": false,
|
| 24 |
+
"use_label_enc": false,
|
| 25 |
+
"decoder": "Qwen/Qwen2-0.5B",
|
| 26 |
+
"pretrained_path": null,
|
| 27 |
+
"device": "cuda",
|
| 28 |
+
"save_dir": "checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False"
|
| 29 |
+
}
|
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:899e7091844b7f50440f5aa9aa04c209307f22229ff7f1c9aab519a2f89cc7d4
|
| 3 |
+
size 1975220226
|
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"stage": "stage2",
|
| 3 |
+
"lr": 3e-05,
|
| 4 |
+
"epochs": 10,
|
| 5 |
+
"log_interval": 4,
|
| 6 |
+
"gradient_clip": 1.0,
|
| 7 |
+
"tr_batch_size": 4,
|
| 8 |
+
"te_batch_size": 4,
|
| 9 |
+
"gradient_accumulation_steps": 1,
|
| 10 |
+
"update_params": [
|
| 11 |
+
"all"
|
| 12 |
+
],
|
| 13 |
+
"corpus": "infer_type",
|
| 14 |
+
"num_of_sents": [
|
| 15 |
+
12,
|
| 16 |
+
12
|
| 17 |
+
],
|
| 18 |
+
"encoder": "bert-base-cased",
|
| 19 |
+
"repeat": 1,
|
| 20 |
+
"max_num_each_cat": 4000,
|
| 21 |
+
"fb_mode": 0.0,
|
| 22 |
+
"set_loss_mask": false,
|
| 23 |
+
"use_label_dec": true,
|
| 24 |
+
"use_label_enc": false,
|
| 25 |
+
"use_bm25": false,
|
| 26 |
+
"decoder": "Qwen/Qwen2-0.5B",
|
| 27 |
+
"pretrained_path": null,
|
| 28 |
+
"device": "cuda",
|
| 29 |
+
"save_dir": "checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True"
|
| 30 |
+
}
|
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/partial_model_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f729c5c3c53313af36de3f4a6d5ab8d90e1797918340feaf2ccf594fb60e90b
|
| 3 |
+
size 1975288322
|
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/train_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"stage": "stage2_ICL",
|
| 3 |
+
"lr": 3e-05,
|
| 4 |
+
"epochs": 10,
|
| 5 |
+
"log_interval": 4,
|
| 6 |
+
"gradient_clip": 1.0,
|
| 7 |
+
"tr_batch_size": 4,
|
| 8 |
+
"te_batch_size": 4,
|
| 9 |
+
"gradient_accumulation_steps": 1,
|
| 10 |
+
"update_params": [
|
| 11 |
+
"all"
|
| 12 |
+
],
|
| 13 |
+
"corpus": "infer_type",
|
| 14 |
+
"num_of_sents": [
|
| 15 |
+
12,
|
| 16 |
+
12
|
| 17 |
+
],
|
| 18 |
+
"encoder": "bert-base-cased",
|
| 19 |
+
"repeat": 1,
|
| 20 |
+
"max_num_each_cat": 4000,
|
| 21 |
+
"fb_mode": 0.0,
|
| 22 |
+
"set_loss_mask": false,
|
| 23 |
+
"use_label_dec": false,
|
| 24 |
+
"use_label_enc": false,
|
| 25 |
+
"use_bm25": true,
|
| 26 |
+
"decoder": "Qwen/Qwen2.5-0.5B",
|
| 27 |
+
"pretrained_path": null,
|
| 28 |
+
"device": "cuda",
|
| 29 |
+
"save_dir": "checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False"
|
| 30 |
+
}
|
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/train_log.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae392c1e5f93d295d81829d9ff408651277d56df5e5183267af864c497e89d35
|
| 3 |
+
size 1975288322
|
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"stage": "stage2",
|
| 3 |
+
"lr": 3e-05,
|
| 4 |
+
"epochs": 10,
|
| 5 |
+
"log_interval": 4,
|
| 6 |
+
"gradient_clip": 1.0,
|
| 7 |
+
"tr_batch_size": 4,
|
| 8 |
+
"te_batch_size": 4,
|
| 9 |
+
"gradient_accumulation_steps": 1,
|
| 10 |
+
"update_params": [
|
| 11 |
+
"all"
|
| 12 |
+
],
|
| 13 |
+
"corpus": "infer_type",
|
| 14 |
+
"num_of_sents": [
|
| 15 |
+
12,
|
| 16 |
+
12
|
| 17 |
+
],
|
| 18 |
+
"encoder": "bert-base-cased",
|
| 19 |
+
"repeat": 1,
|
| 20 |
+
"max_num_each_cat": 4000,
|
| 21 |
+
"fb_mode": 0.0,
|
| 22 |
+
"set_loss_mask": false,
|
| 23 |
+
"use_label_dec": false,
|
| 24 |
+
"use_label_enc": false,
|
| 25 |
+
"use_bm25": true,
|
| 26 |
+
"decoder": "Qwen/Qwen2.5-0.5B",
|
| 27 |
+
"pretrained_path": null,
|
| 28 |
+
"device": "cuda",
|
| 29 |
+
"save_dir": "checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False"
|
| 30 |
+
}
|
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04a9ddc66967f95b531ba32325e18c291a3ada8256123f05bca4759892d1129a
|
| 3 |
+
size 1975288322
|
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"stage": "stage2",
|
| 3 |
+
"lr": 3e-05,
|
| 4 |
+
"epochs": 10,
|
| 5 |
+
"log_interval": 4,
|
| 6 |
+
"gradient_clip": 1.0,
|
| 7 |
+
"tr_batch_size": 4,
|
| 8 |
+
"te_batch_size": 4,
|
| 9 |
+
"gradient_accumulation_steps": 1,
|
| 10 |
+
"update_params": [
|
| 11 |
+
"all"
|
| 12 |
+
],
|
| 13 |
+
"corpus": "infer_type",
|
| 14 |
+
"num_of_sents": [
|
| 15 |
+
12,
|
| 16 |
+
12
|
| 17 |
+
],
|
| 18 |
+
"encoder": "bert-base-cased",
|
| 19 |
+
"repeat": 1,
|
| 20 |
+
"max_num_each_cat": 4000,
|
| 21 |
+
"fb_mode": 0.0,
|
| 22 |
+
"set_loss_mask": false,
|
| 23 |
+
"use_label_dec": true,
|
| 24 |
+
"use_label_enc": false,
|
| 25 |
+
"use_bm25": true,
|
| 26 |
+
"decoder": "Qwen/Qwen2.5-0.5B",
|
| 27 |
+
"pretrained_path": null,
|
| 28 |
+
"device": "cuda",
|
| 29 |
+
"save_dir": "checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True"
|
| 30 |
+
}
|
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c97276b22ce7df60e04b8acd567883eebf455cddbf193d5ad83a42b9f45e99a
|
| 3 |
+
size 6820164300
|
checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"stage": "stage2",
|
| 3 |
+
"lr": 3e-05,
|
| 4 |
+
"epochs": 10,
|
| 5 |
+
"log_interval": 4,
|
| 6 |
+
"gradient_clip": 1.0,
|
| 7 |
+
"tr_batch_size": 4,
|
| 8 |
+
"te_batch_size": 4,
|
| 9 |
+
"gradient_accumulation_steps": 1,
|
| 10 |
+
"update_params": [
|
| 11 |
+
"all"
|
| 12 |
+
],
|
| 13 |
+
"corpus": "infer_type",
|
| 14 |
+
"num_of_sents": [
|
| 15 |
+
12,
|
| 16 |
+
12
|
| 17 |
+
],
|
| 18 |
+
"encoder": "bert-base-cased",
|
| 19 |
+
"repeat": 1,
|
| 20 |
+
"max_num_each_cat": 4000,
|
| 21 |
+
"fb_mode": 0.0,
|
| 22 |
+
"set_loss_mask": false,
|
| 23 |
+
"use_label_dec": true,
|
| 24 |
+
"use_label_enc": false,
|
| 25 |
+
"use_bm25": false,
|
| 26 |
+
"decoder": "andrijdavid/Llama-3-1B-Base",
|
| 27 |
+
"pretrained_path": null,
|
| 28 |
+
"device": "cuda",
|
| 29 |
+
"save_dir": "checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True"
|
| 30 |
+
}
|
checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b155fb19d22856d1fc8413956ecf90f67aaffbbcaea5ad87a8c85f783626c2eb
|
| 3 |
+
size 1324928730
|
checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"stage": "stage2",
|
| 3 |
+
"lr": 3e-05,
|
| 4 |
+
"epochs": 10,
|
| 5 |
+
"log_interval": 4,
|
| 6 |
+
"gradient_clip": 1.0,
|
| 7 |
+
"tr_batch_size": 4,
|
| 8 |
+
"te_batch_size": 4,
|
| 9 |
+
"gradient_accumulation_steps": 1,
|
| 10 |
+
"update_params": [
|
| 11 |
+
"all"
|
| 12 |
+
],
|
| 13 |
+
"corpus": "infer_type",
|
| 14 |
+
"num_of_sents": [
|
| 15 |
+
12,
|
| 16 |
+
12
|
| 17 |
+
],
|
| 18 |
+
"encoder": "bert-base-cased",
|
| 19 |
+
"repeat": 1,
|
| 20 |
+
"max_num_each_cat": 4000,
|
| 21 |
+
"fb_mode": 0.0,
|
| 22 |
+
"set_loss_mask": false,
|
| 23 |
+
"use_label_dec": false,
|
| 24 |
+
"use_label_enc": false,
|
| 25 |
+
"use_bm25": false,
|
| 26 |
+
"decoder": "facebook/opt-350m",
|
| 27 |
+
"pretrained_path": null,
|
| 28 |
+
"device": "cuda",
|
| 29 |
+
"save_dir": "checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False"
|
| 30 |
+
}
|
checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3108b696347b7f621a07632543a7c0fe2bf9f26de2c5065c4cc06d92f2b516d
|
| 3 |
+
size 1419418522
|
checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"stage": "stage2",
|
| 3 |
+
"lr": 3e-05,
|
| 4 |
+
"epochs": 10,
|
| 5 |
+
"log_interval": 4,
|
| 6 |
+
"gradient_clip": 1.0,
|
| 7 |
+
"tr_batch_size": 4,
|
| 8 |
+
"te_batch_size": 4,
|
| 9 |
+
"gradient_accumulation_steps": 1,
|
| 10 |
+
"update_params": [
|
| 11 |
+
"all"
|
| 12 |
+
],
|
| 13 |
+
"corpus": "infer_type",
|
| 14 |
+
"num_of_sents": [
|
| 15 |
+
12,
|
| 16 |
+
12
|
| 17 |
+
],
|
| 18 |
+
"encoder": "bert-base-cased",
|
| 19 |
+
"repeat": 1,
|
| 20 |
+
"max_num_each_cat": 4000,
|
| 21 |
+
"fb_mode": 0.0,
|
| 22 |
+
"set_loss_mask": false,
|
| 23 |
+
"use_label_dec": true,
|
| 24 |
+
"use_label_enc": false,
|
| 25 |
+
"use_bm25": false,
|
| 26 |
+
"decoder": "gpt2-medium",
|
| 27 |
+
"pretrained_path": null,
|
| 28 |
+
"device": "cuda",
|
| 29 |
+
"save_dir": "checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True"
|
| 30 |
+
}
|
checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:135e6e9c3dcb82e5444c9e947571855ee2196ff7e2ea1a166dbfe26ffcb2873a
|
| 3 |
+
size 497826010
|
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"stage": "stage2",
|
| 3 |
+
"lr": 3e-05,
|
| 4 |
+
"epochs": 10,
|
| 5 |
+
"log_interval": 4,
|
| 6 |
+
"gradient_clip": 1.0,
|
| 7 |
+
"tr_batch_size": 4,
|
| 8 |
+
"te_batch_size": 4,
|
| 9 |
+
"gradient_accumulation_steps": 1,
|
| 10 |
+
"update_params": [
|
| 11 |
+
"all"
|
| 12 |
+
],
|
| 13 |
+
"corpus": "infer_type",
|
| 14 |
+
"num_of_sents": [
|
| 15 |
+
12,
|
| 16 |
+
12
|
| 17 |
+
],
|
| 18 |
+
"encoder": "bert-base-cased",
|
| 19 |
+
"repeat": 1,
|
| 20 |
+
"max_num_each_cat": 4000,
|
| 21 |
+
"fb_mode": 0.0,
|
| 22 |
+
"set_loss_mask": false,
|
| 23 |
+
"use_label_dec": false,
|
| 24 |
+
"use_label_enc": false,
|
| 25 |
+
"use_bm25": false,
|
| 26 |
+
"decoder": "gpt2",
|
| 27 |
+
"pretrained_path": null,
|
| 28 |
+
"device": "cuda",
|
| 29 |
+
"save_dir": "checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False"
|
| 30 |
+
}
|
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:635c07126d89947c18139a7817699748ded40a7c390c1e1e1c97d770e9edb6d7
|
| 3 |
+
size 497826010
|
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"stage": "stage2",
|
| 3 |
+
"lr": 3e-05,
|
| 4 |
+
"epochs": 10,
|
| 5 |
+
"log_interval": 4,
|
| 6 |
+
"gradient_clip": 1.0,
|
| 7 |
+
"tr_batch_size": 4,
|
| 8 |
+
"te_batch_size": 4,
|
| 9 |
+
"gradient_accumulation_steps": 1,
|
| 10 |
+
"update_params": [
|
| 11 |
+
"all"
|
| 12 |
+
],
|
| 13 |
+
"corpus": "infer_type",
|
| 14 |
+
"num_of_sents": [
|
| 15 |
+
12,
|
| 16 |
+
12
|
| 17 |
+
],
|
| 18 |
+
"encoder": "bert-base-cased",
|
| 19 |
+
"repeat": 2,
|
| 20 |
+
"max_num_each_cat": 200,
|
| 21 |
+
"fb_mode": 0.0,
|
| 22 |
+
"set_loss_mask": false,
|
| 23 |
+
"use_label_dec": true,
|
| 24 |
+
"use_label_enc": false,
|
| 25 |
+
"decoder": "gpt2",
|
| 26 |
+
"pretrained_path": null,
|
| 27 |
+
"device": "cuda",
|
| 28 |
+
"save_dir": "checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True"
|
| 29 |
+
}
|
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|