maazfarooqui commited on
Commit
303c783
·
1 Parent(s): 2721274

removed the directory

Browse files
Files changed (31) hide show
  1. bart_large_model/checkpoint-1000/config.json +0 -73
  2. bart_large_model/checkpoint-1000/generation_config.json +0 -13
  3. bart_large_model/checkpoint-1000/merges.txt +0 -0
  4. bart_large_model/checkpoint-1000/model.safetensors +0 -3
  5. bart_large_model/checkpoint-1000/optimizer.pt +0 -3
  6. bart_large_model/checkpoint-1000/rng_state.pth +0 -3
  7. bart_large_model/checkpoint-1000/scheduler.pt +0 -3
  8. bart_large_model/checkpoint-1000/special_tokens_map.json +0 -51
  9. bart_large_model/checkpoint-1000/tokenizer_config.json +0 -56
  10. bart_large_model/checkpoint-1000/trainer_state.json +0 -183
  11. bart_large_model/checkpoint-1000/training_args.bin +0 -3
  12. bart_large_model/checkpoint-1000/vocab.json +0 -0
  13. bart_large_model/checkpoint-900/config.json +0 -73
  14. bart_large_model/checkpoint-900/generation_config.json +0 -13
  15. bart_large_model/checkpoint-900/merges.txt +0 -0
  16. bart_large_model/checkpoint-900/model.safetensors +0 -3
  17. bart_large_model/checkpoint-900/optimizer.pt +0 -3
  18. bart_large_model/checkpoint-900/rng_state.pth +0 -3
  19. bart_large_model/checkpoint-900/scheduler.pt +0 -3
  20. bart_large_model/checkpoint-900/special_tokens_map.json +0 -51
  21. bart_large_model/checkpoint-900/tokenizer_config.json +0 -56
  22. bart_large_model/checkpoint-900/trainer_state.json +0 -168
  23. bart_large_model/checkpoint-900/training_args.bin +0 -3
  24. bart_large_model/checkpoint-900/vocab.json +0 -0
  25. bart_large_model/config.json +0 -73
  26. bart_large_model/generation_config.json +0 -13
  27. bart_large_model/merges.txt +0 -0
  28. bart_large_model/model.safetensors +0 -3
  29. bart_large_model/special_tokens_map.json +0 -51
  30. bart_large_model/tokenizer_config.json +0 -56
  31. bart_large_model/vocab.json +0 -0
bart_large_model/checkpoint-1000/config.json DELETED
@@ -1,73 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/bart-large",
3
- "activation_dropout": 0.1,
4
- "activation_function": "gelu",
5
- "add_bias_logits": false,
6
- "add_final_layer_norm": false,
7
- "architectures": [
8
- "BartForConditionalGeneration"
9
- ],
10
- "attention_dropout": 0.1,
11
- "bos_token_id": 0,
12
- "classif_dropout": 0.1,
13
- "classifier_dropout": 0.0,
14
- "d_model": 1024,
15
- "decoder_attention_heads": 16,
16
- "decoder_ffn_dim": 4096,
17
- "decoder_layerdrop": 0.0,
18
- "decoder_layers": 12,
19
- "decoder_start_token_id": 2,
20
- "dropout": 0.1,
21
- "early_stopping": null,
22
- "encoder_attention_heads": 16,
23
- "encoder_ffn_dim": 4096,
24
- "encoder_layerdrop": 0.0,
25
- "encoder_layers": 12,
26
- "eos_token_id": 2,
27
- "forced_eos_token_id": 2,
28
- "gradient_checkpointing": false,
29
- "id2label": {
30
- "0": "LABEL_0",
31
- "1": "LABEL_1",
32
- "2": "LABEL_2"
33
- },
34
- "init_std": 0.02,
35
- "is_encoder_decoder": true,
36
- "label2id": {
37
- "LABEL_0": 0,
38
- "LABEL_1": 1,
39
- "LABEL_2": 2
40
- },
41
- "max_position_embeddings": 1024,
42
- "model_type": "bart",
43
- "no_repeat_ngram_size": null,
44
- "normalize_before": false,
45
- "num_beams": null,
46
- "num_hidden_layers": 12,
47
- "pad_token_id": 1,
48
- "scale_embedding": false,
49
- "task_specific_params": {
50
- "summarization": {
51
- "length_penalty": 1.0,
52
- "max_length": 128,
53
- "min_length": 12,
54
- "num_beams": 4
55
- },
56
- "summarization_cnn": {
57
- "length_penalty": 2.0,
58
- "max_length": 142,
59
- "min_length": 56,
60
- "num_beams": 4
61
- },
62
- "summarization_xsum": {
63
- "length_penalty": 1.0,
64
- "max_length": 62,
65
- "min_length": 11,
66
- "num_beams": 6
67
- }
68
- },
69
- "torch_dtype": "float32",
70
- "transformers_version": "4.46.3",
71
- "use_cache": true,
72
- "vocab_size": 50265
73
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/checkpoint-1000/generation_config.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "decoder_start_token_id": 2,
5
- "early_stopping": true,
6
- "eos_token_id": 2,
7
- "forced_bos_token_id": 0,
8
- "forced_eos_token_id": 2,
9
- "no_repeat_ngram_size": 3,
10
- "num_beams": 4,
11
- "pad_token_id": 1,
12
- "transformers_version": "4.46.3"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/checkpoint-1000/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
bart_large_model/checkpoint-1000/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fb34c71395f3ba2e022f8811ce33ec67a3bb4b5eeb32a33ad66b1827303ed87
3
- size 1625426996
 
 
 
 
bart_large_model/checkpoint-1000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0bc12596207b6d7064e858aa4a72977aa63e5b82359b9a8b89c0cf3db30c154
3
- size 3250759951
 
 
 
 
bart_large_model/checkpoint-1000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1afcead05b669fd547e71d5c1780e33945da2c67977b5cb64f3f8ac1a24192c3
3
- size 14244
 
 
 
 
bart_large_model/checkpoint-1000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:323d47a6fc046ef5379714c1c193f6260caad1c603f0e6b6dc7e770d5c6605f6
3
- size 1064
 
 
 
 
bart_large_model/checkpoint-1000/special_tokens_map.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
- "normalized": true,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/checkpoint-1000/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": true,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- }
44
- },
45
- "bos_token": "<s>",
46
- "clean_up_tokenization_spaces": false,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "mask_token": "<mask>",
51
- "model_max_length": 1024,
52
- "pad_token": "<pad>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "BartTokenizer",
55
- "unk_token": "<unk>"
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/checkpoint-1000/trainer_state.json DELETED
@@ -1,183 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
- "eval_steps": 500,
6
- "global_step": 1000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 1.0,
13
- "grad_norm": 10.48393726348877,
14
- "learning_rate": 4.53e-05,
15
- "loss": 4.4604,
16
- "step": 100
17
- },
18
- {
19
- "epoch": 1.0,
20
- "eval_loss": 2.9183404445648193,
21
- "eval_runtime": 1.8488,
22
- "eval_samples_per_second": 107.636,
23
- "eval_steps_per_second": 13.522,
24
- "step": 100
25
- },
26
- {
27
- "epoch": 2.0,
28
- "grad_norm": 7.335023403167725,
29
- "learning_rate": 4.0300000000000004e-05,
30
- "loss": 2.7258,
31
- "step": 200
32
- },
33
- {
34
- "epoch": 2.0,
35
- "eval_loss": 2.7304582595825195,
36
- "eval_runtime": 1.8116,
37
- "eval_samples_per_second": 109.846,
38
- "eval_steps_per_second": 13.8,
39
- "step": 200
40
- },
41
- {
42
- "epoch": 3.0,
43
- "grad_norm": 5.964653491973877,
44
- "learning_rate": 3.535e-05,
45
- "loss": 2.3864,
46
- "step": 300
47
- },
48
- {
49
- "epoch": 3.0,
50
- "eval_loss": 2.6989123821258545,
51
- "eval_runtime": 1.8114,
52
- "eval_samples_per_second": 109.86,
53
- "eval_steps_per_second": 13.801,
54
- "step": 300
55
- },
56
- {
57
- "epoch": 4.0,
58
- "grad_norm": 6.744424343109131,
59
- "learning_rate": 3.035e-05,
60
- "loss": 2.0862,
61
- "step": 400
62
- },
63
- {
64
- "epoch": 4.0,
65
- "eval_loss": 2.7234792709350586,
66
- "eval_runtime": 1.8082,
67
- "eval_samples_per_second": 110.057,
68
- "eval_steps_per_second": 13.826,
69
- "step": 400
70
- },
71
- {
72
- "epoch": 5.0,
73
- "grad_norm": 5.396308898925781,
74
- "learning_rate": 2.5350000000000003e-05,
75
- "loss": 1.8448,
76
- "step": 500
77
- },
78
- {
79
- "epoch": 5.0,
80
- "eval_loss": 2.7597484588623047,
81
- "eval_runtime": 1.8017,
82
- "eval_samples_per_second": 110.449,
83
- "eval_steps_per_second": 13.875,
84
- "step": 500
85
- },
86
- {
87
- "epoch": 6.0,
88
- "grad_norm": 6.125258445739746,
89
- "learning_rate": 2.035e-05,
90
- "loss": 1.6564,
91
- "step": 600
92
- },
93
- {
94
- "epoch": 6.0,
95
- "eval_loss": 2.7703540325164795,
96
- "eval_runtime": 1.799,
97
- "eval_samples_per_second": 110.619,
98
- "eval_steps_per_second": 13.897,
99
- "step": 600
100
- },
101
- {
102
- "epoch": 7.0,
103
- "grad_norm": 5.595690727233887,
104
- "learning_rate": 1.535e-05,
105
- "loss": 1.502,
106
- "step": 700
107
- },
108
- {
109
- "epoch": 7.0,
110
- "eval_loss": 2.848972797393799,
111
- "eval_runtime": 1.8013,
112
- "eval_samples_per_second": 110.478,
113
- "eval_steps_per_second": 13.879,
114
- "step": 700
115
- },
116
- {
117
- "epoch": 8.0,
118
- "grad_norm": 6.748431205749512,
119
- "learning_rate": 1.035e-05,
120
- "loss": 1.3629,
121
- "step": 800
122
- },
123
- {
124
- "epoch": 8.0,
125
- "eval_loss": 2.87552547454834,
126
- "eval_runtime": 1.8067,
127
- "eval_samples_per_second": 110.145,
128
- "eval_steps_per_second": 13.837,
129
- "step": 800
130
- },
131
- {
132
- "epoch": 9.0,
133
- "grad_norm": 5.841766834259033,
134
- "learning_rate": 5.3500000000000004e-06,
135
- "loss": 1.2656,
136
- "step": 900
137
- },
138
- {
139
- "epoch": 9.0,
140
- "eval_loss": 2.961149215698242,
141
- "eval_runtime": 1.8115,
142
- "eval_samples_per_second": 109.852,
143
- "eval_steps_per_second": 13.801,
144
- "step": 900
145
- },
146
- {
147
- "epoch": 10.0,
148
- "grad_norm": 6.789031505584717,
149
- "learning_rate": 3.5000000000000004e-07,
150
- "loss": 1.2044,
151
- "step": 1000
152
- },
153
- {
154
- "epoch": 10.0,
155
- "eval_loss": 2.985333204269409,
156
- "eval_runtime": 1.8076,
157
- "eval_samples_per_second": 110.09,
158
- "eval_steps_per_second": 13.83,
159
- "step": 1000
160
- }
161
- ],
162
- "logging_steps": 100,
163
- "max_steps": 1000,
164
- "num_input_tokens_seen": 0,
165
- "num_train_epochs": 10,
166
- "save_steps": 500,
167
- "stateful_callbacks": {
168
- "TrainerControl": {
169
- "args": {
170
- "should_epoch_stop": false,
171
- "should_evaluate": false,
172
- "should_log": false,
173
- "should_save": true,
174
- "should_training_stop": true
175
- },
176
- "attributes": {}
177
- }
178
- },
179
- "total_flos": 8625076316405760.0,
180
- "train_batch_size": 8,
181
- "trial_name": null,
182
- "trial_params": null
183
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/checkpoint-1000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09dc51aedc8b6a1721df5bf19a3d5e1df0000ee8526dd53976676358dd16f10a
3
- size 5368
 
 
 
 
bart_large_model/checkpoint-1000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
bart_large_model/checkpoint-900/config.json DELETED
@@ -1,73 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/bart-large",
3
- "activation_dropout": 0.1,
4
- "activation_function": "gelu",
5
- "add_bias_logits": false,
6
- "add_final_layer_norm": false,
7
- "architectures": [
8
- "BartForConditionalGeneration"
9
- ],
10
- "attention_dropout": 0.1,
11
- "bos_token_id": 0,
12
- "classif_dropout": 0.1,
13
- "classifier_dropout": 0.0,
14
- "d_model": 1024,
15
- "decoder_attention_heads": 16,
16
- "decoder_ffn_dim": 4096,
17
- "decoder_layerdrop": 0.0,
18
- "decoder_layers": 12,
19
- "decoder_start_token_id": 2,
20
- "dropout": 0.1,
21
- "early_stopping": null,
22
- "encoder_attention_heads": 16,
23
- "encoder_ffn_dim": 4096,
24
- "encoder_layerdrop": 0.0,
25
- "encoder_layers": 12,
26
- "eos_token_id": 2,
27
- "forced_eos_token_id": 2,
28
- "gradient_checkpointing": false,
29
- "id2label": {
30
- "0": "LABEL_0",
31
- "1": "LABEL_1",
32
- "2": "LABEL_2"
33
- },
34
- "init_std": 0.02,
35
- "is_encoder_decoder": true,
36
- "label2id": {
37
- "LABEL_0": 0,
38
- "LABEL_1": 1,
39
- "LABEL_2": 2
40
- },
41
- "max_position_embeddings": 1024,
42
- "model_type": "bart",
43
- "no_repeat_ngram_size": null,
44
- "normalize_before": false,
45
- "num_beams": null,
46
- "num_hidden_layers": 12,
47
- "pad_token_id": 1,
48
- "scale_embedding": false,
49
- "task_specific_params": {
50
- "summarization": {
51
- "length_penalty": 1.0,
52
- "max_length": 128,
53
- "min_length": 12,
54
- "num_beams": 4
55
- },
56
- "summarization_cnn": {
57
- "length_penalty": 2.0,
58
- "max_length": 142,
59
- "min_length": 56,
60
- "num_beams": 4
61
- },
62
- "summarization_xsum": {
63
- "length_penalty": 1.0,
64
- "max_length": 62,
65
- "min_length": 11,
66
- "num_beams": 6
67
- }
68
- },
69
- "torch_dtype": "float32",
70
- "transformers_version": "4.46.3",
71
- "use_cache": true,
72
- "vocab_size": 50265
73
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/checkpoint-900/generation_config.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "decoder_start_token_id": 2,
5
- "early_stopping": true,
6
- "eos_token_id": 2,
7
- "forced_bos_token_id": 0,
8
- "forced_eos_token_id": 2,
9
- "no_repeat_ngram_size": 3,
10
- "num_beams": 4,
11
- "pad_token_id": 1,
12
- "transformers_version": "4.46.3"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/checkpoint-900/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
bart_large_model/checkpoint-900/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6c17518e22b7a8ac10fcb78ee84c5929d95b52199d36d0471f6932e0c49956a
3
- size 1625426996
 
 
 
 
bart_large_model/checkpoint-900/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4c9d7b56314e954a98d5f64456845ccbdecbf0e2260c2deda3ad21dc1f434ee
3
- size 3250759951
 
 
 
 
bart_large_model/checkpoint-900/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb198bc79ee70b136d286fe88644710725b231910d40d483901c1b9d41d48b2d
3
- size 14244
 
 
 
 
bart_large_model/checkpoint-900/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1ed6c4067a543a29be3a81d3b0a915ca274e37dea4d34f45e645224f5c179e0
3
- size 1064
 
 
 
 
bart_large_model/checkpoint-900/special_tokens_map.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
- "normalized": true,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/checkpoint-900/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": true,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- }
44
- },
45
- "bos_token": "<s>",
46
- "clean_up_tokenization_spaces": false,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "mask_token": "<mask>",
51
- "model_max_length": 1024,
52
- "pad_token": "<pad>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "BartTokenizer",
55
- "unk_token": "<unk>"
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/checkpoint-900/trainer_state.json DELETED
@@ -1,168 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 9.0,
5
- "eval_steps": 500,
6
- "global_step": 900,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 1.0,
13
- "grad_norm": 10.48393726348877,
14
- "learning_rate": 4.53e-05,
15
- "loss": 4.4604,
16
- "step": 100
17
- },
18
- {
19
- "epoch": 1.0,
20
- "eval_loss": 2.9183404445648193,
21
- "eval_runtime": 1.8488,
22
- "eval_samples_per_second": 107.636,
23
- "eval_steps_per_second": 13.522,
24
- "step": 100
25
- },
26
- {
27
- "epoch": 2.0,
28
- "grad_norm": 7.335023403167725,
29
- "learning_rate": 4.0300000000000004e-05,
30
- "loss": 2.7258,
31
- "step": 200
32
- },
33
- {
34
- "epoch": 2.0,
35
- "eval_loss": 2.7304582595825195,
36
- "eval_runtime": 1.8116,
37
- "eval_samples_per_second": 109.846,
38
- "eval_steps_per_second": 13.8,
39
- "step": 200
40
- },
41
- {
42
- "epoch": 3.0,
43
- "grad_norm": 5.964653491973877,
44
- "learning_rate": 3.535e-05,
45
- "loss": 2.3864,
46
- "step": 300
47
- },
48
- {
49
- "epoch": 3.0,
50
- "eval_loss": 2.6989123821258545,
51
- "eval_runtime": 1.8114,
52
- "eval_samples_per_second": 109.86,
53
- "eval_steps_per_second": 13.801,
54
- "step": 300
55
- },
56
- {
57
- "epoch": 4.0,
58
- "grad_norm": 6.744424343109131,
59
- "learning_rate": 3.035e-05,
60
- "loss": 2.0862,
61
- "step": 400
62
- },
63
- {
64
- "epoch": 4.0,
65
- "eval_loss": 2.7234792709350586,
66
- "eval_runtime": 1.8082,
67
- "eval_samples_per_second": 110.057,
68
- "eval_steps_per_second": 13.826,
69
- "step": 400
70
- },
71
- {
72
- "epoch": 5.0,
73
- "grad_norm": 5.396308898925781,
74
- "learning_rate": 2.5350000000000003e-05,
75
- "loss": 1.8448,
76
- "step": 500
77
- },
78
- {
79
- "epoch": 5.0,
80
- "eval_loss": 2.7597484588623047,
81
- "eval_runtime": 1.8017,
82
- "eval_samples_per_second": 110.449,
83
- "eval_steps_per_second": 13.875,
84
- "step": 500
85
- },
86
- {
87
- "epoch": 6.0,
88
- "grad_norm": 6.125258445739746,
89
- "learning_rate": 2.035e-05,
90
- "loss": 1.6564,
91
- "step": 600
92
- },
93
- {
94
- "epoch": 6.0,
95
- "eval_loss": 2.7703540325164795,
96
- "eval_runtime": 1.799,
97
- "eval_samples_per_second": 110.619,
98
- "eval_steps_per_second": 13.897,
99
- "step": 600
100
- },
101
- {
102
- "epoch": 7.0,
103
- "grad_norm": 5.595690727233887,
104
- "learning_rate": 1.535e-05,
105
- "loss": 1.502,
106
- "step": 700
107
- },
108
- {
109
- "epoch": 7.0,
110
- "eval_loss": 2.848972797393799,
111
- "eval_runtime": 1.8013,
112
- "eval_samples_per_second": 110.478,
113
- "eval_steps_per_second": 13.879,
114
- "step": 700
115
- },
116
- {
117
- "epoch": 8.0,
118
- "grad_norm": 6.748431205749512,
119
- "learning_rate": 1.035e-05,
120
- "loss": 1.3629,
121
- "step": 800
122
- },
123
- {
124
- "epoch": 8.0,
125
- "eval_loss": 2.87552547454834,
126
- "eval_runtime": 1.8067,
127
- "eval_samples_per_second": 110.145,
128
- "eval_steps_per_second": 13.837,
129
- "step": 800
130
- },
131
- {
132
- "epoch": 9.0,
133
- "grad_norm": 5.841766834259033,
134
- "learning_rate": 5.3500000000000004e-06,
135
- "loss": 1.2656,
136
- "step": 900
137
- },
138
- {
139
- "epoch": 9.0,
140
- "eval_loss": 2.961149215698242,
141
- "eval_runtime": 1.8115,
142
- "eval_samples_per_second": 109.852,
143
- "eval_steps_per_second": 13.801,
144
- "step": 900
145
- }
146
- ],
147
- "logging_steps": 100,
148
- "max_steps": 1000,
149
- "num_input_tokens_seen": 0,
150
- "num_train_epochs": 10,
151
- "save_steps": 500,
152
- "stateful_callbacks": {
153
- "TrainerControl": {
154
- "args": {
155
- "should_epoch_stop": false,
156
- "should_evaluate": false,
157
- "should_log": false,
158
- "should_save": true,
159
- "should_training_stop": false
160
- },
161
- "attributes": {}
162
- }
163
- },
164
- "total_flos": 7762568684765184.0,
165
- "train_batch_size": 8,
166
- "trial_name": null,
167
- "trial_params": null
168
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/checkpoint-900/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09dc51aedc8b6a1721df5bf19a3d5e1df0000ee8526dd53976676358dd16f10a
3
- size 5368
 
 
 
 
bart_large_model/checkpoint-900/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
bart_large_model/config.json DELETED
@@ -1,73 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/bart-large",
3
- "activation_dropout": 0.1,
4
- "activation_function": "gelu",
5
- "add_bias_logits": false,
6
- "add_final_layer_norm": false,
7
- "architectures": [
8
- "BartForConditionalGeneration"
9
- ],
10
- "attention_dropout": 0.1,
11
- "bos_token_id": 0,
12
- "classif_dropout": 0.1,
13
- "classifier_dropout": 0.0,
14
- "d_model": 1024,
15
- "decoder_attention_heads": 16,
16
- "decoder_ffn_dim": 4096,
17
- "decoder_layerdrop": 0.0,
18
- "decoder_layers": 12,
19
- "decoder_start_token_id": 2,
20
- "dropout": 0.1,
21
- "early_stopping": null,
22
- "encoder_attention_heads": 16,
23
- "encoder_ffn_dim": 4096,
24
- "encoder_layerdrop": 0.0,
25
- "encoder_layers": 12,
26
- "eos_token_id": 2,
27
- "forced_eos_token_id": 2,
28
- "gradient_checkpointing": false,
29
- "id2label": {
30
- "0": "LABEL_0",
31
- "1": "LABEL_1",
32
- "2": "LABEL_2"
33
- },
34
- "init_std": 0.02,
35
- "is_encoder_decoder": true,
36
- "label2id": {
37
- "LABEL_0": 0,
38
- "LABEL_1": 1,
39
- "LABEL_2": 2
40
- },
41
- "max_position_embeddings": 1024,
42
- "model_type": "bart",
43
- "no_repeat_ngram_size": null,
44
- "normalize_before": false,
45
- "num_beams": null,
46
- "num_hidden_layers": 12,
47
- "pad_token_id": 1,
48
- "scale_embedding": false,
49
- "task_specific_params": {
50
- "summarization": {
51
- "length_penalty": 1.0,
52
- "max_length": 128,
53
- "min_length": 12,
54
- "num_beams": 4
55
- },
56
- "summarization_cnn": {
57
- "length_penalty": 2.0,
58
- "max_length": 142,
59
- "min_length": 56,
60
- "num_beams": 4
61
- },
62
- "summarization_xsum": {
63
- "length_penalty": 1.0,
64
- "max_length": 62,
65
- "min_length": 11,
66
- "num_beams": 6
67
- }
68
- },
69
- "torch_dtype": "float32",
70
- "transformers_version": "4.46.3",
71
- "use_cache": true,
72
- "vocab_size": 50265
73
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/generation_config.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "decoder_start_token_id": 2,
5
- "early_stopping": true,
6
- "eos_token_id": 2,
7
- "forced_bos_token_id": 0,
8
- "forced_eos_token_id": 2,
9
- "no_repeat_ngram_size": 3,
10
- "num_beams": 4,
11
- "pad_token_id": 1,
12
- "transformers_version": "4.46.3"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
bart_large_model/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fb34c71395f3ba2e022f8811ce33ec67a3bb4b5eeb32a33ad66b1827303ed87
3
- size 1625426996
 
 
 
 
bart_large_model/special_tokens_map.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
- "normalized": true,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": true,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- }
44
- },
45
- "bos_token": "<s>",
46
- "clean_up_tokenization_spaces": false,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "mask_token": "<mask>",
51
- "model_max_length": 1024,
52
- "pad_token": "<pad>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "BartTokenizer",
55
- "unk_token": "<unk>"
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bart_large_model/vocab.json DELETED
The diff for this file is too large to render. See raw diff