YZHANG32 commited on
Commit
66af879
·
verified ·
1 Parent(s): 632a7a8

Initial upload

Browse files
Files changed (30) hide show
  1. checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth +3 -0
  2. checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json +29 -0
  3. checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log +0 -0
  4. checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth +3 -0
  5. checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json +30 -0
  6. checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log +0 -0
  7. checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/partial_model_weights.pth +3 -0
  8. checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/train_config.json +30 -0
  9. checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/train_log.log +0 -0
  10. checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth +3 -0
  11. checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json +30 -0
  12. checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log +0 -0
  13. checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth +3 -0
  14. checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json +30 -0
  15. checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log +0 -0
  16. checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth +3 -0
  17. checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json +30 -0
  18. checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log +0 -0
  19. checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth +3 -0
  20. checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json +30 -0
  21. checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log +0 -0
  22. checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth +3 -0
  23. checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json +30 -0
  24. checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log +0 -0
  25. checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth +3 -0
  26. checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json +30 -0
  27. checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log +0 -0
  28. checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth +3 -0
  29. checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json +29 -0
  30. checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log +0 -0
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b565d802db011d4a2c09c8cbd589b2f4856975cfaec6462d72851d7f1421de
3
+ size 1975220226
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stage": "stage2",
3
+ "lr": 3e-05,
4
+ "epochs": 10,
5
+ "log_interval": 4,
6
+ "gradient_clip": 1.0,
7
+ "tr_batch_size": 4,
8
+ "te_batch_size": 4,
9
+ "gradient_accumulation_steps": 1,
10
+ "update_params": [
11
+ "all"
12
+ ],
13
+ "corpus": "infer_type",
14
+ "num_of_sents": [
15
+ 12,
16
+ 12
17
+ ],
18
+ "encoder": "bert-base-cased",
19
+ "repeat": 2,
20
+ "max_num_each_cat": 200,
21
+ "fb_mode": 0.0,
22
+ "set_loss_mask": false,
23
+ "use_label_dec": false,
24
+ "use_label_enc": false,
25
+ "decoder": "Qwen/Qwen2-0.5B",
26
+ "pretrained_path": null,
27
+ "device": "cuda",
28
+ "save_dir": "checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False"
29
+ }
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:899e7091844b7f50440f5aa9aa04c209307f22229ff7f1c9aab519a2f89cc7d4
3
+ size 1975220226
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stage": "stage2",
3
+ "lr": 3e-05,
4
+ "epochs": 10,
5
+ "log_interval": 4,
6
+ "gradient_clip": 1.0,
7
+ "tr_batch_size": 4,
8
+ "te_batch_size": 4,
9
+ "gradient_accumulation_steps": 1,
10
+ "update_params": [
11
+ "all"
12
+ ],
13
+ "corpus": "infer_type",
14
+ "num_of_sents": [
15
+ 12,
16
+ 12
17
+ ],
18
+ "encoder": "bert-base-cased",
19
+ "repeat": 1,
20
+ "max_num_each_cat": 4000,
21
+ "fb_mode": 0.0,
22
+ "set_loss_mask": false,
23
+ "use_label_dec": true,
24
+ "use_label_enc": false,
25
+ "use_bm25": false,
26
+ "decoder": "Qwen/Qwen2-0.5B",
27
+ "pretrained_path": null,
28
+ "device": "cuda",
29
+ "save_dir": "checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True"
30
+ }
checkpoint_dec_Qwen-Qwen2-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/partial_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f729c5c3c53313af36de3f4a6d5ab8d90e1797918340feaf2ccf594fb60e90b
3
+ size 1975288322
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/train_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stage": "stage2_ICL",
3
+ "lr": 3e-05,
4
+ "epochs": 10,
5
+ "log_interval": 4,
6
+ "gradient_clip": 1.0,
7
+ "tr_batch_size": 4,
8
+ "te_batch_size": 4,
9
+ "gradient_accumulation_steps": 1,
10
+ "update_params": [
11
+ "all"
12
+ ],
13
+ "corpus": "infer_type",
14
+ "num_of_sents": [
15
+ 12,
16
+ 12
17
+ ],
18
+ "encoder": "bert-base-cased",
19
+ "repeat": 1,
20
+ "max_num_each_cat": 4000,
21
+ "fb_mode": 0.0,
22
+ "set_loss_mask": false,
23
+ "use_label_dec": false,
24
+ "use_label_enc": false,
25
+ "use_bm25": true,
26
+ "decoder": "Qwen/Qwen2.5-0.5B",
27
+ "pretrained_path": null,
28
+ "device": "cuda",
29
+ "save_dir": "checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False"
30
+ }
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_ICL_num_12_use_label_dec_False/train_log.log ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae392c1e5f93d295d81829d9ff408651277d56df5e5183267af864c497e89d35
3
+ size 1975288322
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stage": "stage2",
3
+ "lr": 3e-05,
4
+ "epochs": 10,
5
+ "log_interval": 4,
6
+ "gradient_clip": 1.0,
7
+ "tr_batch_size": 4,
8
+ "te_batch_size": 4,
9
+ "gradient_accumulation_steps": 1,
10
+ "update_params": [
11
+ "all"
12
+ ],
13
+ "corpus": "infer_type",
14
+ "num_of_sents": [
15
+ 12,
16
+ 12
17
+ ],
18
+ "encoder": "bert-base-cased",
19
+ "repeat": 1,
20
+ "max_num_each_cat": 4000,
21
+ "fb_mode": 0.0,
22
+ "set_loss_mask": false,
23
+ "use_label_dec": false,
24
+ "use_label_enc": false,
25
+ "use_bm25": true,
26
+ "decoder": "Qwen/Qwen2.5-0.5B",
27
+ "pretrained_path": null,
28
+ "device": "cuda",
29
+ "save_dir": "checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False"
30
+ }
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04a9ddc66967f95b531ba32325e18c291a3ada8256123f05bca4759892d1129a
3
+ size 1975288322
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stage": "stage2",
3
+ "lr": 3e-05,
4
+ "epochs": 10,
5
+ "log_interval": 4,
6
+ "gradient_clip": 1.0,
7
+ "tr_batch_size": 4,
8
+ "te_batch_size": 4,
9
+ "gradient_accumulation_steps": 1,
10
+ "update_params": [
11
+ "all"
12
+ ],
13
+ "corpus": "infer_type",
14
+ "num_of_sents": [
15
+ 12,
16
+ 12
17
+ ],
18
+ "encoder": "bert-base-cased",
19
+ "repeat": 1,
20
+ "max_num_each_cat": 4000,
21
+ "fb_mode": 0.0,
22
+ "set_loss_mask": false,
23
+ "use_label_dec": true,
24
+ "use_label_enc": false,
25
+ "use_bm25": true,
26
+ "decoder": "Qwen/Qwen2.5-0.5B",
27
+ "pretrained_path": null,
28
+ "device": "cuda",
29
+ "save_dir": "checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True"
30
+ }
checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c97276b22ce7df60e04b8acd567883eebf455cddbf193d5ad83a42b9f45e99a
3
+ size 6820164300
checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stage": "stage2",
3
+ "lr": 3e-05,
4
+ "epochs": 10,
5
+ "log_interval": 4,
6
+ "gradient_clip": 1.0,
7
+ "tr_batch_size": 4,
8
+ "te_batch_size": 4,
9
+ "gradient_accumulation_steps": 1,
10
+ "update_params": [
11
+ "all"
12
+ ],
13
+ "corpus": "infer_type",
14
+ "num_of_sents": [
15
+ 12,
16
+ 12
17
+ ],
18
+ "encoder": "bert-base-cased",
19
+ "repeat": 1,
20
+ "max_num_each_cat": 4000,
21
+ "fb_mode": 0.0,
22
+ "set_loss_mask": false,
23
+ "use_label_dec": true,
24
+ "use_label_enc": false,
25
+ "use_bm25": false,
26
+ "decoder": "andrijdavid/Llama-3-1B-Base",
27
+ "pretrained_path": null,
28
+ "device": "cuda",
29
+ "save_dir": "checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True"
30
+ }
checkpoint_dec_andrijdavid-Llama-3-1B-Base_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b155fb19d22856d1fc8413956ecf90f67aaffbbcaea5ad87a8c85f783626c2eb
3
+ size 1324928730
checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stage": "stage2",
3
+ "lr": 3e-05,
4
+ "epochs": 10,
5
+ "log_interval": 4,
6
+ "gradient_clip": 1.0,
7
+ "tr_batch_size": 4,
8
+ "te_batch_size": 4,
9
+ "gradient_accumulation_steps": 1,
10
+ "update_params": [
11
+ "all"
12
+ ],
13
+ "corpus": "infer_type",
14
+ "num_of_sents": [
15
+ 12,
16
+ 12
17
+ ],
18
+ "encoder": "bert-base-cased",
19
+ "repeat": 1,
20
+ "max_num_each_cat": 4000,
21
+ "fb_mode": 0.0,
22
+ "set_loss_mask": false,
23
+ "use_label_dec": false,
24
+ "use_label_enc": false,
25
+ "use_bm25": false,
26
+ "decoder": "facebook/opt-350m",
27
+ "pretrained_path": null,
28
+ "device": "cuda",
29
+ "save_dir": "checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False"
30
+ }
checkpoint_dec_facebook-opt-350m_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3108b696347b7f621a07632543a7c0fe2bf9f26de2c5065c4cc06d92f2b516d
3
+ size 1419418522
checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stage": "stage2",
3
+ "lr": 3e-05,
4
+ "epochs": 10,
5
+ "log_interval": 4,
6
+ "gradient_clip": 1.0,
7
+ "tr_batch_size": 4,
8
+ "te_batch_size": 4,
9
+ "gradient_accumulation_steps": 1,
10
+ "update_params": [
11
+ "all"
12
+ ],
13
+ "corpus": "infer_type",
14
+ "num_of_sents": [
15
+ 12,
16
+ 12
17
+ ],
18
+ "encoder": "bert-base-cased",
19
+ "repeat": 1,
20
+ "max_num_each_cat": 4000,
21
+ "fb_mode": 0.0,
22
+ "set_loss_mask": false,
23
+ "use_label_dec": true,
24
+ "use_label_enc": false,
25
+ "use_bm25": false,
26
+ "decoder": "gpt2-medium",
27
+ "pretrained_path": null,
28
+ "device": "cuda",
29
+ "save_dir": "checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True"
30
+ }
checkpoint_dec_gpt2-medium_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/partial_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:135e6e9c3dcb82e5444c9e947571855ee2196ff7e2ea1a166dbfe26ffcb2873a
3
+ size 497826010
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stage": "stage2",
3
+ "lr": 3e-05,
4
+ "epochs": 10,
5
+ "log_interval": 4,
6
+ "gradient_clip": 1.0,
7
+ "tr_batch_size": 4,
8
+ "te_batch_size": 4,
9
+ "gradient_accumulation_steps": 1,
10
+ "update_params": [
11
+ "all"
12
+ ],
13
+ "corpus": "infer_type",
14
+ "num_of_sents": [
15
+ 12,
16
+ 12
17
+ ],
18
+ "encoder": "bert-base-cased",
19
+ "repeat": 1,
20
+ "max_num_each_cat": 4000,
21
+ "fb_mode": 0.0,
22
+ "set_loss_mask": false,
23
+ "use_label_dec": false,
24
+ "use_label_enc": false,
25
+ "use_bm25": false,
26
+ "decoder": "gpt2",
27
+ "pretrained_path": null,
28
+ "device": "cuda",
29
+ "save_dir": "checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False"
30
+ }
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_False/train_log.log ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/partial_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:635c07126d89947c18139a7817699748ded40a7c390c1e1e1c97d770e9edb6d7
3
+ size 497826010
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stage": "stage2",
3
+ "lr": 3e-05,
4
+ "epochs": 10,
5
+ "log_interval": 4,
6
+ "gradient_clip": 1.0,
7
+ "tr_batch_size": 4,
8
+ "te_batch_size": 4,
9
+ "gradient_accumulation_steps": 1,
10
+ "update_params": [
11
+ "all"
12
+ ],
13
+ "corpus": "infer_type",
14
+ "num_of_sents": [
15
+ 12,
16
+ 12
17
+ ],
18
+ "encoder": "bert-base-cased",
19
+ "repeat": 2,
20
+ "max_num_each_cat": 200,
21
+ "fb_mode": 0.0,
22
+ "set_loss_mask": false,
23
+ "use_label_dec": true,
24
+ "use_label_enc": false,
25
+ "decoder": "gpt2",
26
+ "pretrained_path": null,
27
+ "device": "cuda",
28
+ "save_dir": "checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True"
29
+ }
checkpoint_dec_gpt2_epochs_10_corpus_infer_type_stage_stage2_num_12_use_label_dec_True/train_log.log ADDED
The diff for this file is too large to render. See raw diff