mllm-dev commited on
Commit
d55a197
·
verified ·
1 Parent(s): 0ed3b2b

Upload folder using huggingface_hub

Browse files
checkpoint-403/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2-medium",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 1024,
16
+ "n_head": 16,
17
+ "n_inner": null,
18
+ "n_layer": 24,
19
+ "n_positions": 1024,
20
+ "n_special": 0,
21
+ "predict_special_tokens": true,
22
+ "reorder_and_upcast_attn": false,
23
+ "resid_pdrop": 0.1,
24
+ "scale_attn_by_inverse_layer_idx": false,
25
+ "scale_attn_weights": true,
26
+ "summary_activation": null,
27
+ "summary_first_dropout": 0.1,
28
+ "summary_proj_to_labels": true,
29
+ "summary_type": "cls_index",
30
+ "summary_use_proj": true,
31
+ "task_specific_params": {
32
+ "text-generation": {
33
+ "do_sample": true,
34
+ "max_length": 50
35
+ }
36
+ },
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.40.0.dev0",
39
+ "use_cache": true,
40
+ "vocab_size": 50257
41
+ }
checkpoint-403/generation_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 50256,
3
+ "eos_token_id": 50256,
4
+ "transformers_version": "4.40.0.dev0"
5
+ }
checkpoint-403/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-403/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6677586b06caa8116c0858cb629c1afd1743bc6c5418cb4c6ec4839af8e45a2
3
+ size 1419322880
checkpoint-403/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2f024b67ca73030cb662d639029d283e6366fe135d515fe45584ca38c18e6fc
3
+ size 2838828805
checkpoint-403/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:859567dfc90efb5895df92a3351a46cb20c6c4279c46cf528b18cf31aa2ceb97
3
+ size 14575
checkpoint-403/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e53420901bd8fc6ae2f9f5d65b3201d84b3bc7144089baddc4325dd2c807f195
3
+ size 627
checkpoint-403/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
checkpoint-403/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-403/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "padding_side": "left",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
checkpoint-403/trainer_state.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 6.03105354309082,
3
+ "best_model_checkpoint": "bill_sum_finetune_test_gpt2_medium/checkpoint-403",
4
+ "epoch": 13.0,
5
+ "eval_steps": 500,
6
+ "global_step": 403,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_gen_len": 600.0,
14
+ "eval_loss": 6.400322914123535,
15
+ "eval_rouge1": 0.3998,
16
+ "eval_rouge2": 0.1672,
17
+ "eval_rougeL": 0.2188,
18
+ "eval_rougeLsum": 0.3449,
19
+ "eval_runtime": 34.2204,
20
+ "eval_samples_per_second": 7.247,
21
+ "eval_steps_per_second": 0.234,
22
+ "step": 31
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_gen_len": 600.0,
27
+ "eval_loss": 6.197171688079834,
28
+ "eval_rouge1": 0.4092,
29
+ "eval_rouge2": 0.1715,
30
+ "eval_rougeL": 0.2241,
31
+ "eval_rougeLsum": 0.3526,
32
+ "eval_runtime": 34.6438,
33
+ "eval_samples_per_second": 7.159,
34
+ "eval_steps_per_second": 0.231,
35
+ "step": 62
36
+ },
37
+ {
38
+ "epoch": 3.0,
39
+ "eval_gen_len": 600.0,
40
+ "eval_loss": 6.118707656860352,
41
+ "eval_rouge1": 0.3997,
42
+ "eval_rouge2": 0.1673,
43
+ "eval_rougeL": 0.2187,
44
+ "eval_rougeLsum": 0.3447,
45
+ "eval_runtime": 34.107,
46
+ "eval_samples_per_second": 7.271,
47
+ "eval_steps_per_second": 0.235,
48
+ "step": 93
49
+ },
50
+ {
51
+ "epoch": 4.0,
52
+ "eval_gen_len": 600.0,
53
+ "eval_loss": 6.087508201599121,
54
+ "eval_rouge1": 0.4,
55
+ "eval_rouge2": 0.1676,
56
+ "eval_rougeL": 0.219,
57
+ "eval_rougeLsum": 0.3451,
58
+ "eval_runtime": 34.99,
59
+ "eval_samples_per_second": 7.088,
60
+ "eval_steps_per_second": 0.229,
61
+ "step": 124
62
+ },
63
+ {
64
+ "epoch": 5.0,
65
+ "eval_gen_len": 600.0,
66
+ "eval_loss": 6.069467544555664,
67
+ "eval_rouge1": 0.3999,
68
+ "eval_rouge2": 0.1674,
69
+ "eval_rougeL": 0.2189,
70
+ "eval_rougeLsum": 0.3449,
71
+ "eval_runtime": 33.9661,
72
+ "eval_samples_per_second": 7.301,
73
+ "eval_steps_per_second": 0.236,
74
+ "step": 155
75
+ },
76
+ {
77
+ "epoch": 6.0,
78
+ "eval_gen_len": 600.0,
79
+ "eval_loss": 6.058210372924805,
80
+ "eval_rouge1": 0.3997,
81
+ "eval_rouge2": 0.1674,
82
+ "eval_rougeL": 0.2188,
83
+ "eval_rougeLsum": 0.3446,
84
+ "eval_runtime": 34.8589,
85
+ "eval_samples_per_second": 7.114,
86
+ "eval_steps_per_second": 0.229,
87
+ "step": 186
88
+ },
89
+ {
90
+ "epoch": 7.0,
91
+ "eval_gen_len": 600.0,
92
+ "eval_loss": 6.04926872253418,
93
+ "eval_rouge1": 0.3998,
94
+ "eval_rouge2": 0.1673,
95
+ "eval_rougeL": 0.2188,
96
+ "eval_rougeLsum": 0.3449,
97
+ "eval_runtime": 34.0483,
98
+ "eval_samples_per_second": 7.284,
99
+ "eval_steps_per_second": 0.235,
100
+ "step": 217
101
+ },
102
+ {
103
+ "epoch": 8.0,
104
+ "eval_gen_len": 600.0,
105
+ "eval_loss": 6.043769836425781,
106
+ "eval_rouge1": 0.403,
107
+ "eval_rouge2": 0.1689,
108
+ "eval_rougeL": 0.2205,
109
+ "eval_rougeLsum": 0.3473,
110
+ "eval_runtime": 34.9445,
111
+ "eval_samples_per_second": 7.097,
112
+ "eval_steps_per_second": 0.229,
113
+ "step": 248
114
+ },
115
+ {
116
+ "epoch": 9.0,
117
+ "eval_gen_len": 600.0,
118
+ "eval_loss": 6.037980079650879,
119
+ "eval_rouge1": 0.401,
120
+ "eval_rouge2": 0.168,
121
+ "eval_rougeL": 0.2195,
122
+ "eval_rougeLsum": 0.3458,
123
+ "eval_runtime": 33.8278,
124
+ "eval_samples_per_second": 7.331,
125
+ "eval_steps_per_second": 0.236,
126
+ "step": 279
127
+ },
128
+ {
129
+ "epoch": 10.0,
130
+ "eval_gen_len": 600.0,
131
+ "eval_loss": 6.0378899574279785,
132
+ "eval_rouge1": 0.4082,
133
+ "eval_rouge2": 0.1713,
134
+ "eval_rougeL": 0.2236,
135
+ "eval_rougeLsum": 0.3519,
136
+ "eval_runtime": 34.7389,
137
+ "eval_samples_per_second": 7.139,
138
+ "eval_steps_per_second": 0.23,
139
+ "step": 310
140
+ },
141
+ {
142
+ "epoch": 11.0,
143
+ "eval_gen_len": 600.0,
144
+ "eval_loss": 6.032817840576172,
145
+ "eval_rouge1": 0.4137,
146
+ "eval_rouge2": 0.1739,
147
+ "eval_rougeL": 0.2265,
148
+ "eval_rougeLsum": 0.3568,
149
+ "eval_runtime": 33.5201,
150
+ "eval_samples_per_second": 7.399,
151
+ "eval_steps_per_second": 0.239,
152
+ "step": 341
153
+ },
154
+ {
155
+ "epoch": 12.0,
156
+ "eval_gen_len": 600.0,
157
+ "eval_loss": 6.0318193435668945,
158
+ "eval_rouge1": 0.4081,
159
+ "eval_rouge2": 0.1714,
160
+ "eval_rougeL": 0.2235,
161
+ "eval_rougeLsum": 0.3518,
162
+ "eval_runtime": 34.3788,
163
+ "eval_samples_per_second": 7.214,
164
+ "eval_steps_per_second": 0.233,
165
+ "step": 372
166
+ },
167
+ {
168
+ "epoch": 13.0,
169
+ "eval_gen_len": 600.0,
170
+ "eval_loss": 6.03105354309082,
171
+ "eval_rouge1": 0.4106,
172
+ "eval_rouge2": 0.1726,
173
+ "eval_rougeL": 0.2248,
174
+ "eval_rougeLsum": 0.3538,
175
+ "eval_runtime": 33.8705,
176
+ "eval_samples_per_second": 7.322,
177
+ "eval_steps_per_second": 0.236,
178
+ "step": 403
179
+ }
180
+ ],
181
+ "logging_steps": 500,
182
+ "max_steps": 465,
183
+ "num_input_tokens_seen": 0,
184
+ "num_train_epochs": 15,
185
+ "save_steps": 500,
186
+ "total_flos": 1.1940304829546496e+16,
187
+ "train_batch_size": 32,
188
+ "trial_name": null,
189
+ "trial_params": null
190
+ }
checkpoint-403/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25e337903f3db105039bfdbe2ca6169f75ee9aee0e777c78637927868fc1a25d
3
+ size 4795
checkpoint-403/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-434/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2-medium",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 1024,
16
+ "n_head": 16,
17
+ "n_inner": null,
18
+ "n_layer": 24,
19
+ "n_positions": 1024,
20
+ "n_special": 0,
21
+ "predict_special_tokens": true,
22
+ "reorder_and_upcast_attn": false,
23
+ "resid_pdrop": 0.1,
24
+ "scale_attn_by_inverse_layer_idx": false,
25
+ "scale_attn_weights": true,
26
+ "summary_activation": null,
27
+ "summary_first_dropout": 0.1,
28
+ "summary_proj_to_labels": true,
29
+ "summary_type": "cls_index",
30
+ "summary_use_proj": true,
31
+ "task_specific_params": {
32
+ "text-generation": {
33
+ "do_sample": true,
34
+ "max_length": 50
35
+ }
36
+ },
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.40.0.dev0",
39
+ "use_cache": true,
40
+ "vocab_size": 50257
41
+ }
checkpoint-434/generation_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 50256,
3
+ "eos_token_id": 50256,
4
+ "transformers_version": "4.40.0.dev0"
5
+ }
checkpoint-434/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-434/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c115c97455c03f1fc2c60184a35cccbab03dfe2c106ce29fd359b1ad041ff7f1
3
+ size 1419322880
checkpoint-434/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2063f3f162db26b85267dd9bd95cf70ef26677de5ca1d86c7221953b2264b84
3
+ size 2838828805
checkpoint-434/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee493312f46c3967a15c76b4a74f9783c75c3d24f52ca266806a853e0858e8e3
3
+ size 14575
checkpoint-434/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fae830b96f317a47898f92e72b31dfbd0f9a3afdc17b4b00aee4a32463e25cab
3
+ size 627
checkpoint-434/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
checkpoint-434/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-434/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "padding_side": "left",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
checkpoint-434/trainer_state.json ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 6.03105354309082,
3
+ "best_model_checkpoint": "bill_sum_finetune_test_gpt2_medium/checkpoint-403",
4
+ "epoch": 14.0,
5
+ "eval_steps": 500,
6
+ "global_step": 434,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_gen_len": 600.0,
14
+ "eval_loss": 6.400322914123535,
15
+ "eval_rouge1": 0.3998,
16
+ "eval_rouge2": 0.1672,
17
+ "eval_rougeL": 0.2188,
18
+ "eval_rougeLsum": 0.3449,
19
+ "eval_runtime": 34.2204,
20
+ "eval_samples_per_second": 7.247,
21
+ "eval_steps_per_second": 0.234,
22
+ "step": 31
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_gen_len": 600.0,
27
+ "eval_loss": 6.197171688079834,
28
+ "eval_rouge1": 0.4092,
29
+ "eval_rouge2": 0.1715,
30
+ "eval_rougeL": 0.2241,
31
+ "eval_rougeLsum": 0.3526,
32
+ "eval_runtime": 34.6438,
33
+ "eval_samples_per_second": 7.159,
34
+ "eval_steps_per_second": 0.231,
35
+ "step": 62
36
+ },
37
+ {
38
+ "epoch": 3.0,
39
+ "eval_gen_len": 600.0,
40
+ "eval_loss": 6.118707656860352,
41
+ "eval_rouge1": 0.3997,
42
+ "eval_rouge2": 0.1673,
43
+ "eval_rougeL": 0.2187,
44
+ "eval_rougeLsum": 0.3447,
45
+ "eval_runtime": 34.107,
46
+ "eval_samples_per_second": 7.271,
47
+ "eval_steps_per_second": 0.235,
48
+ "step": 93
49
+ },
50
+ {
51
+ "epoch": 4.0,
52
+ "eval_gen_len": 600.0,
53
+ "eval_loss": 6.087508201599121,
54
+ "eval_rouge1": 0.4,
55
+ "eval_rouge2": 0.1676,
56
+ "eval_rougeL": 0.219,
57
+ "eval_rougeLsum": 0.3451,
58
+ "eval_runtime": 34.99,
59
+ "eval_samples_per_second": 7.088,
60
+ "eval_steps_per_second": 0.229,
61
+ "step": 124
62
+ },
63
+ {
64
+ "epoch": 5.0,
65
+ "eval_gen_len": 600.0,
66
+ "eval_loss": 6.069467544555664,
67
+ "eval_rouge1": 0.3999,
68
+ "eval_rouge2": 0.1674,
69
+ "eval_rougeL": 0.2189,
70
+ "eval_rougeLsum": 0.3449,
71
+ "eval_runtime": 33.9661,
72
+ "eval_samples_per_second": 7.301,
73
+ "eval_steps_per_second": 0.236,
74
+ "step": 155
75
+ },
76
+ {
77
+ "epoch": 6.0,
78
+ "eval_gen_len": 600.0,
79
+ "eval_loss": 6.058210372924805,
80
+ "eval_rouge1": 0.3997,
81
+ "eval_rouge2": 0.1674,
82
+ "eval_rougeL": 0.2188,
83
+ "eval_rougeLsum": 0.3446,
84
+ "eval_runtime": 34.8589,
85
+ "eval_samples_per_second": 7.114,
86
+ "eval_steps_per_second": 0.229,
87
+ "step": 186
88
+ },
89
+ {
90
+ "epoch": 7.0,
91
+ "eval_gen_len": 600.0,
92
+ "eval_loss": 6.04926872253418,
93
+ "eval_rouge1": 0.3998,
94
+ "eval_rouge2": 0.1673,
95
+ "eval_rougeL": 0.2188,
96
+ "eval_rougeLsum": 0.3449,
97
+ "eval_runtime": 34.0483,
98
+ "eval_samples_per_second": 7.284,
99
+ "eval_steps_per_second": 0.235,
100
+ "step": 217
101
+ },
102
+ {
103
+ "epoch": 8.0,
104
+ "eval_gen_len": 600.0,
105
+ "eval_loss": 6.043769836425781,
106
+ "eval_rouge1": 0.403,
107
+ "eval_rouge2": 0.1689,
108
+ "eval_rougeL": 0.2205,
109
+ "eval_rougeLsum": 0.3473,
110
+ "eval_runtime": 34.9445,
111
+ "eval_samples_per_second": 7.097,
112
+ "eval_steps_per_second": 0.229,
113
+ "step": 248
114
+ },
115
+ {
116
+ "epoch": 9.0,
117
+ "eval_gen_len": 600.0,
118
+ "eval_loss": 6.037980079650879,
119
+ "eval_rouge1": 0.401,
120
+ "eval_rouge2": 0.168,
121
+ "eval_rougeL": 0.2195,
122
+ "eval_rougeLsum": 0.3458,
123
+ "eval_runtime": 33.8278,
124
+ "eval_samples_per_second": 7.331,
125
+ "eval_steps_per_second": 0.236,
126
+ "step": 279
127
+ },
128
+ {
129
+ "epoch": 10.0,
130
+ "eval_gen_len": 600.0,
131
+ "eval_loss": 6.0378899574279785,
132
+ "eval_rouge1": 0.4082,
133
+ "eval_rouge2": 0.1713,
134
+ "eval_rougeL": 0.2236,
135
+ "eval_rougeLsum": 0.3519,
136
+ "eval_runtime": 34.7389,
137
+ "eval_samples_per_second": 7.139,
138
+ "eval_steps_per_second": 0.23,
139
+ "step": 310
140
+ },
141
+ {
142
+ "epoch": 11.0,
143
+ "eval_gen_len": 600.0,
144
+ "eval_loss": 6.032817840576172,
145
+ "eval_rouge1": 0.4137,
146
+ "eval_rouge2": 0.1739,
147
+ "eval_rougeL": 0.2265,
148
+ "eval_rougeLsum": 0.3568,
149
+ "eval_runtime": 33.5201,
150
+ "eval_samples_per_second": 7.399,
151
+ "eval_steps_per_second": 0.239,
152
+ "step": 341
153
+ },
154
+ {
155
+ "epoch": 12.0,
156
+ "eval_gen_len": 600.0,
157
+ "eval_loss": 6.0318193435668945,
158
+ "eval_rouge1": 0.4081,
159
+ "eval_rouge2": 0.1714,
160
+ "eval_rougeL": 0.2235,
161
+ "eval_rougeLsum": 0.3518,
162
+ "eval_runtime": 34.3788,
163
+ "eval_samples_per_second": 7.214,
164
+ "eval_steps_per_second": 0.233,
165
+ "step": 372
166
+ },
167
+ {
168
+ "epoch": 13.0,
169
+ "eval_gen_len": 600.0,
170
+ "eval_loss": 6.03105354309082,
171
+ "eval_rouge1": 0.4106,
172
+ "eval_rouge2": 0.1726,
173
+ "eval_rougeL": 0.2248,
174
+ "eval_rougeLsum": 0.3538,
175
+ "eval_runtime": 33.8705,
176
+ "eval_samples_per_second": 7.322,
177
+ "eval_steps_per_second": 0.236,
178
+ "step": 403
179
+ },
180
+ {
181
+ "epoch": 14.0,
182
+ "eval_gen_len": 600.0,
183
+ "eval_loss": 6.0320916175842285,
184
+ "eval_rouge1": 0.4058,
185
+ "eval_rouge2": 0.1703,
186
+ "eval_rougeL": 0.2222,
187
+ "eval_rougeLsum": 0.3499,
188
+ "eval_runtime": 34.9479,
189
+ "eval_samples_per_second": 7.096,
190
+ "eval_steps_per_second": 0.229,
191
+ "step": 434
192
+ }
193
+ ],
194
+ "logging_steps": 500,
195
+ "max_steps": 465,
196
+ "num_input_tokens_seen": 0,
197
+ "num_train_epochs": 15,
198
+ "save_steps": 500,
199
+ "total_flos": 1.2858789816434688e+16,
200
+ "train_batch_size": 32,
201
+ "trial_name": null,
202
+ "trial_params": null
203
+ }
checkpoint-434/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25e337903f3db105039bfdbe2ca6169f75ee9aee0e777c78637927868fc1a25d
3
+ size 4795
checkpoint-434/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-465/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2-medium",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 1024,
16
+ "n_head": 16,
17
+ "n_inner": null,
18
+ "n_layer": 24,
19
+ "n_positions": 1024,
20
+ "n_special": 0,
21
+ "predict_special_tokens": true,
22
+ "reorder_and_upcast_attn": false,
23
+ "resid_pdrop": 0.1,
24
+ "scale_attn_by_inverse_layer_idx": false,
25
+ "scale_attn_weights": true,
26
+ "summary_activation": null,
27
+ "summary_first_dropout": 0.1,
28
+ "summary_proj_to_labels": true,
29
+ "summary_type": "cls_index",
30
+ "summary_use_proj": true,
31
+ "task_specific_params": {
32
+ "text-generation": {
33
+ "do_sample": true,
34
+ "max_length": 50
35
+ }
36
+ },
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.40.0.dev0",
39
+ "use_cache": true,
40
+ "vocab_size": 50257
41
+ }
checkpoint-465/generation_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 50256,
3
+ "eos_token_id": 50256,
4
+ "transformers_version": "4.40.0.dev0"
5
+ }
checkpoint-465/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-465/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b1dc8c0157f1e2a6ac5c004e658f82145c37c071ccc1549be1d2e93d789ca4e
3
+ size 1419322880
checkpoint-465/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dce0549a57d24d4488e711dee8fcf62fd663c36e09784eb94d2466ccd334b57
3
+ size 2838828805
checkpoint-465/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ff0a861cbcb9cbff78d05825eccd58ca3194a9fea9d4dac6b88d21269872df
3
+ size 14575
checkpoint-465/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d53cce32fd95970b98716713e2bd16e09d58048535724d407d2f39907fb0e36
3
+ size 627
checkpoint-465/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
checkpoint-465/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-465/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "padding_side": "left",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
checkpoint-465/trainer_state.json ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 6.031024932861328,
3
+ "best_model_checkpoint": "bill_sum_finetune_test_gpt2_medium/checkpoint-465",
4
+ "epoch": 15.0,
5
+ "eval_steps": 500,
6
+ "global_step": 465,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_gen_len": 600.0,
14
+ "eval_loss": 6.400322914123535,
15
+ "eval_rouge1": 0.3998,
16
+ "eval_rouge2": 0.1672,
17
+ "eval_rougeL": 0.2188,
18
+ "eval_rougeLsum": 0.3449,
19
+ "eval_runtime": 34.2204,
20
+ "eval_samples_per_second": 7.247,
21
+ "eval_steps_per_second": 0.234,
22
+ "step": 31
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_gen_len": 600.0,
27
+ "eval_loss": 6.197171688079834,
28
+ "eval_rouge1": 0.4092,
29
+ "eval_rouge2": 0.1715,
30
+ "eval_rougeL": 0.2241,
31
+ "eval_rougeLsum": 0.3526,
32
+ "eval_runtime": 34.6438,
33
+ "eval_samples_per_second": 7.159,
34
+ "eval_steps_per_second": 0.231,
35
+ "step": 62
36
+ },
37
+ {
38
+ "epoch": 3.0,
39
+ "eval_gen_len": 600.0,
40
+ "eval_loss": 6.118707656860352,
41
+ "eval_rouge1": 0.3997,
42
+ "eval_rouge2": 0.1673,
43
+ "eval_rougeL": 0.2187,
44
+ "eval_rougeLsum": 0.3447,
45
+ "eval_runtime": 34.107,
46
+ "eval_samples_per_second": 7.271,
47
+ "eval_steps_per_second": 0.235,
48
+ "step": 93
49
+ },
50
+ {
51
+ "epoch": 4.0,
52
+ "eval_gen_len": 600.0,
53
+ "eval_loss": 6.087508201599121,
54
+ "eval_rouge1": 0.4,
55
+ "eval_rouge2": 0.1676,
56
+ "eval_rougeL": 0.219,
57
+ "eval_rougeLsum": 0.3451,
58
+ "eval_runtime": 34.99,
59
+ "eval_samples_per_second": 7.088,
60
+ "eval_steps_per_second": 0.229,
61
+ "step": 124
62
+ },
63
+ {
64
+ "epoch": 5.0,
65
+ "eval_gen_len": 600.0,
66
+ "eval_loss": 6.069467544555664,
67
+ "eval_rouge1": 0.3999,
68
+ "eval_rouge2": 0.1674,
69
+ "eval_rougeL": 0.2189,
70
+ "eval_rougeLsum": 0.3449,
71
+ "eval_runtime": 33.9661,
72
+ "eval_samples_per_second": 7.301,
73
+ "eval_steps_per_second": 0.236,
74
+ "step": 155
75
+ },
76
+ {
77
+ "epoch": 6.0,
78
+ "eval_gen_len": 600.0,
79
+ "eval_loss": 6.058210372924805,
80
+ "eval_rouge1": 0.3997,
81
+ "eval_rouge2": 0.1674,
82
+ "eval_rougeL": 0.2188,
83
+ "eval_rougeLsum": 0.3446,
84
+ "eval_runtime": 34.8589,
85
+ "eval_samples_per_second": 7.114,
86
+ "eval_steps_per_second": 0.229,
87
+ "step": 186
88
+ },
89
+ {
90
+ "epoch": 7.0,
91
+ "eval_gen_len": 600.0,
92
+ "eval_loss": 6.04926872253418,
93
+ "eval_rouge1": 0.3998,
94
+ "eval_rouge2": 0.1673,
95
+ "eval_rougeL": 0.2188,
96
+ "eval_rougeLsum": 0.3449,
97
+ "eval_runtime": 34.0483,
98
+ "eval_samples_per_second": 7.284,
99
+ "eval_steps_per_second": 0.235,
100
+ "step": 217
101
+ },
102
+ {
103
+ "epoch": 8.0,
104
+ "eval_gen_len": 600.0,
105
+ "eval_loss": 6.043769836425781,
106
+ "eval_rouge1": 0.403,
107
+ "eval_rouge2": 0.1689,
108
+ "eval_rougeL": 0.2205,
109
+ "eval_rougeLsum": 0.3473,
110
+ "eval_runtime": 34.9445,
111
+ "eval_samples_per_second": 7.097,
112
+ "eval_steps_per_second": 0.229,
113
+ "step": 248
114
+ },
115
+ {
116
+ "epoch": 9.0,
117
+ "eval_gen_len": 600.0,
118
+ "eval_loss": 6.037980079650879,
119
+ "eval_rouge1": 0.401,
120
+ "eval_rouge2": 0.168,
121
+ "eval_rougeL": 0.2195,
122
+ "eval_rougeLsum": 0.3458,
123
+ "eval_runtime": 33.8278,
124
+ "eval_samples_per_second": 7.331,
125
+ "eval_steps_per_second": 0.236,
126
+ "step": 279
127
+ },
128
+ {
129
+ "epoch": 10.0,
130
+ "eval_gen_len": 600.0,
131
+ "eval_loss": 6.0378899574279785,
132
+ "eval_rouge1": 0.4082,
133
+ "eval_rouge2": 0.1713,
134
+ "eval_rougeL": 0.2236,
135
+ "eval_rougeLsum": 0.3519,
136
+ "eval_runtime": 34.7389,
137
+ "eval_samples_per_second": 7.139,
138
+ "eval_steps_per_second": 0.23,
139
+ "step": 310
140
+ },
141
+ {
142
+ "epoch": 11.0,
143
+ "eval_gen_len": 600.0,
144
+ "eval_loss": 6.032817840576172,
145
+ "eval_rouge1": 0.4137,
146
+ "eval_rouge2": 0.1739,
147
+ "eval_rougeL": 0.2265,
148
+ "eval_rougeLsum": 0.3568,
149
+ "eval_runtime": 33.5201,
150
+ "eval_samples_per_second": 7.399,
151
+ "eval_steps_per_second": 0.239,
152
+ "step": 341
153
+ },
154
+ {
155
+ "epoch": 12.0,
156
+ "eval_gen_len": 600.0,
157
+ "eval_loss": 6.0318193435668945,
158
+ "eval_rouge1": 0.4081,
159
+ "eval_rouge2": 0.1714,
160
+ "eval_rougeL": 0.2235,
161
+ "eval_rougeLsum": 0.3518,
162
+ "eval_runtime": 34.3788,
163
+ "eval_samples_per_second": 7.214,
164
+ "eval_steps_per_second": 0.233,
165
+ "step": 372
166
+ },
167
+ {
168
+ "epoch": 13.0,
169
+ "eval_gen_len": 600.0,
170
+ "eval_loss": 6.03105354309082,
171
+ "eval_rouge1": 0.4106,
172
+ "eval_rouge2": 0.1726,
173
+ "eval_rougeL": 0.2248,
174
+ "eval_rougeLsum": 0.3538,
175
+ "eval_runtime": 33.8705,
176
+ "eval_samples_per_second": 7.322,
177
+ "eval_steps_per_second": 0.236,
178
+ "step": 403
179
+ },
180
+ {
181
+ "epoch": 14.0,
182
+ "eval_gen_len": 600.0,
183
+ "eval_loss": 6.0320916175842285,
184
+ "eval_rouge1": 0.4058,
185
+ "eval_rouge2": 0.1703,
186
+ "eval_rougeL": 0.2222,
187
+ "eval_rougeLsum": 0.3499,
188
+ "eval_runtime": 34.9479,
189
+ "eval_samples_per_second": 7.096,
190
+ "eval_steps_per_second": 0.229,
191
+ "step": 434
192
+ },
193
+ {
194
+ "epoch": 15.0,
195
+ "eval_gen_len": 600.0,
196
+ "eval_loss": 6.031024932861328,
197
+ "eval_rouge1": 0.4132,
198
+ "eval_rouge2": 0.1737,
199
+ "eval_rougeL": 0.2263,
200
+ "eval_rougeLsum": 0.3561,
201
+ "eval_runtime": 33.388,
202
+ "eval_samples_per_second": 7.428,
203
+ "eval_steps_per_second": 0.24,
204
+ "step": 465
205
+ }
206
+ ],
207
+ "logging_steps": 500,
208
+ "max_steps": 465,
209
+ "num_input_tokens_seen": 0,
210
+ "num_train_epochs": 15,
211
+ "save_steps": 500,
212
+ "total_flos": 1.377727480332288e+16,
213
+ "train_batch_size": 32,
214
+ "trial_name": null,
215
+ "trial_params": null
216
+ }
checkpoint-465/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25e337903f3db105039bfdbe2ca6169f75ee9aee0e777c78637927868fc1a25d
3
+ size 4795
checkpoint-465/vocab.json ADDED
The diff for this file is too large to render. See raw diff