sominw commited on
Commit
b2a99ae
·
1 Parent(s): 54984ce

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-large",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2816,
7
+ "d_kv": 64,
8
+ "d_model": 1024,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "n_positions": 512,
20
+ "num_decoder_layers": 24,
21
+ "num_heads": 16,
22
+ "num_layers": 24,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.24.0",
30
+ "use_cache": true,
31
+ "vocab_size": 32128
32
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc44e581ba73719abd0a47486d51b3a686f7494717a46ba298fe7e08bf6f85b1
3
+ size 6265534689
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24c54aa5e5e7497cf11879e2a24ec87cddaee8953fbc1fd725cf1ddce6eb40ec
3
+ size 3132789733
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7c2ce990214420fe7049704ada0b1a328186ecc35d5ea760d6f073603ec600d
3
+ size 14503
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec90150116dd5950bcc220ac4fdbf0fc692dde66fd1ed6ef8b2609f88f36a57c
3
+ size 623
special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "extra_ids": 100,
106
+ "model_max_length": 512,
107
+ "name_or_path": "google/flan-t5-large",
108
+ "pad_token": "<pad>",
109
+ "sp_model_kwargs": {},
110
+ "special_tokens_map_file": "/home/younes_huggingface_co/.cache/huggingface/hub/models--google--t5-v1_1-large/snapshots/314bc112b191ec17b625ba81438dc73d6c23659d/special_tokens_map.json",
111
+ "tokenizer_class": "T5Tokenizer",
112
+ "unk_token": "<unk>"
113
+ }
trainer_state.json ADDED
@@ -0,0 +1,358 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.2085561497326203,
5
+ "global_step": 18000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.18,
12
+ "learning_rate": 2.9465240641711232e-05,
13
+ "loss": 0.249,
14
+ "step": 1000
15
+ },
16
+ {
17
+ "epoch": 0.18,
18
+ "eval_gen_len": 19.0,
19
+ "eval_loss": 0.14023509621620178,
20
+ "eval_rouge1": 0.3062,
21
+ "eval_rouge2": 0.2486,
22
+ "eval_rougeL": 0.3052,
23
+ "eval_rougeLsum": 0.3051,
24
+ "eval_runtime": 520.6566,
25
+ "eval_samples_per_second": 4.79,
26
+ "eval_steps_per_second": 1.198,
27
+ "step": 1000
28
+ },
29
+ {
30
+ "epoch": 0.36,
31
+ "learning_rate": 2.893048128342246e-05,
32
+ "loss": 0.1555,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.36,
37
+ "eval_gen_len": 19.0,
38
+ "eval_loss": 0.12116534262895584,
39
+ "eval_rouge1": 0.3159,
40
+ "eval_rouge2": 0.2601,
41
+ "eval_rougeL": 0.3151,
42
+ "eval_rougeLsum": 0.3151,
43
+ "eval_runtime": 520.8894,
44
+ "eval_samples_per_second": 4.788,
45
+ "eval_steps_per_second": 1.198,
46
+ "step": 2000
47
+ },
48
+ {
49
+ "epoch": 0.53,
50
+ "learning_rate": 2.8395721925133692e-05,
51
+ "loss": 0.1393,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.53,
56
+ "eval_gen_len": 19.0,
57
+ "eval_loss": 0.11260348558425903,
58
+ "eval_rouge1": 0.3198,
59
+ "eval_rouge2": 0.2646,
60
+ "eval_rougeL": 0.3195,
61
+ "eval_rougeLsum": 0.3193,
62
+ "eval_runtime": 521.5395,
63
+ "eval_samples_per_second": 4.782,
64
+ "eval_steps_per_second": 1.196,
65
+ "step": 3000
66
+ },
67
+ {
68
+ "epoch": 0.71,
69
+ "learning_rate": 2.786096256684492e-05,
70
+ "loss": 0.1344,
71
+ "step": 4000
72
+ },
73
+ {
74
+ "epoch": 0.71,
75
+ "eval_gen_len": 19.0,
76
+ "eval_loss": 0.10910908132791519,
77
+ "eval_rouge1": 0.3158,
78
+ "eval_rouge2": 0.2609,
79
+ "eval_rougeL": 0.3153,
80
+ "eval_rougeLsum": 0.3152,
81
+ "eval_runtime": 521.3922,
82
+ "eval_samples_per_second": 4.783,
83
+ "eval_steps_per_second": 1.197,
84
+ "step": 4000
85
+ },
86
+ {
87
+ "epoch": 0.89,
88
+ "learning_rate": 2.732620320855615e-05,
89
+ "loss": 0.1238,
90
+ "step": 5000
91
+ },
92
+ {
93
+ "epoch": 0.89,
94
+ "eval_gen_len": 19.0,
95
+ "eval_loss": 0.10584986209869385,
96
+ "eval_rouge1": 0.3219,
97
+ "eval_rouge2": 0.2669,
98
+ "eval_rougeL": 0.3216,
99
+ "eval_rougeLsum": 0.3217,
100
+ "eval_runtime": 521.9752,
101
+ "eval_samples_per_second": 4.778,
102
+ "eval_steps_per_second": 1.195,
103
+ "step": 5000
104
+ },
105
+ {
106
+ "epoch": 1.07,
107
+ "learning_rate": 2.679144385026738e-05,
108
+ "loss": 0.1161,
109
+ "step": 6000
110
+ },
111
+ {
112
+ "epoch": 1.07,
113
+ "eval_gen_len": 19.0,
114
+ "eval_loss": 0.10303913056850433,
115
+ "eval_rouge1": 0.323,
116
+ "eval_rouge2": 0.2677,
117
+ "eval_rougeL": 0.3225,
118
+ "eval_rougeLsum": 0.3224,
119
+ "eval_runtime": 520.4378,
120
+ "eval_samples_per_second": 4.792,
121
+ "eval_steps_per_second": 1.199,
122
+ "step": 6000
123
+ },
124
+ {
125
+ "epoch": 1.25,
126
+ "learning_rate": 2.625668449197861e-05,
127
+ "loss": 0.1068,
128
+ "step": 7000
129
+ },
130
+ {
131
+ "epoch": 1.25,
132
+ "eval_gen_len": 19.0,
133
+ "eval_loss": 0.10013392567634583,
134
+ "eval_rouge1": 0.3216,
135
+ "eval_rouge2": 0.2665,
136
+ "eval_rougeL": 0.3212,
137
+ "eval_rougeLsum": 0.3213,
138
+ "eval_runtime": 521.441,
139
+ "eval_samples_per_second": 4.783,
140
+ "eval_steps_per_second": 1.197,
141
+ "step": 7000
142
+ },
143
+ {
144
+ "epoch": 1.43,
145
+ "learning_rate": 2.572192513368984e-05,
146
+ "loss": 0.1084,
147
+ "step": 8000
148
+ },
149
+ {
150
+ "epoch": 1.43,
151
+ "eval_gen_len": 19.0,
152
+ "eval_loss": 0.09854520857334137,
153
+ "eval_rouge1": 0.3235,
154
+ "eval_rouge2": 0.2687,
155
+ "eval_rougeL": 0.323,
156
+ "eval_rougeLsum": 0.3229,
157
+ "eval_runtime": 523.6333,
158
+ "eval_samples_per_second": 4.763,
159
+ "eval_steps_per_second": 1.192,
160
+ "step": 8000
161
+ },
162
+ {
163
+ "epoch": 1.6,
164
+ "learning_rate": 2.518716577540107e-05,
165
+ "loss": 0.1015,
166
+ "step": 9000
167
+ },
168
+ {
169
+ "epoch": 1.6,
170
+ "eval_gen_len": 19.0,
171
+ "eval_loss": 0.0971846953034401,
172
+ "eval_rouge1": 0.3239,
173
+ "eval_rouge2": 0.2692,
174
+ "eval_rougeL": 0.3235,
175
+ "eval_rougeLsum": 0.3236,
176
+ "eval_runtime": 530.9604,
177
+ "eval_samples_per_second": 4.697,
178
+ "eval_steps_per_second": 1.175,
179
+ "step": 9000
180
+ },
181
+ {
182
+ "epoch": 1.78,
183
+ "learning_rate": 2.4652406417112303e-05,
184
+ "loss": 0.1026,
185
+ "step": 10000
186
+ },
187
+ {
188
+ "epoch": 1.78,
189
+ "eval_gen_len": 19.0,
190
+ "eval_loss": 0.09422960877418518,
191
+ "eval_rouge1": 0.3253,
192
+ "eval_rouge2": 0.2701,
193
+ "eval_rougeL": 0.3249,
194
+ "eval_rougeLsum": 0.3248,
195
+ "eval_runtime": 520.5989,
196
+ "eval_samples_per_second": 4.791,
197
+ "eval_steps_per_second": 1.199,
198
+ "step": 10000
199
+ },
200
+ {
201
+ "epoch": 1.96,
202
+ "learning_rate": 2.411764705882353e-05,
203
+ "loss": 0.1019,
204
+ "step": 11000
205
+ },
206
+ {
207
+ "epoch": 1.96,
208
+ "eval_gen_len": 19.0,
209
+ "eval_loss": 0.09265820682048798,
210
+ "eval_rouge1": 0.3261,
211
+ "eval_rouge2": 0.2715,
212
+ "eval_rougeL": 0.3258,
213
+ "eval_rougeLsum": 0.3258,
214
+ "eval_runtime": 521.7855,
215
+ "eval_samples_per_second": 4.78,
216
+ "eval_steps_per_second": 1.196,
217
+ "step": 11000
218
+ },
219
+ {
220
+ "epoch": 2.14,
221
+ "learning_rate": 2.3582887700534762e-05,
222
+ "loss": 0.0928,
223
+ "step": 12000
224
+ },
225
+ {
226
+ "epoch": 2.14,
227
+ "eval_gen_len": 19.0,
228
+ "eval_loss": 0.09256169199943542,
229
+ "eval_rouge1": 0.3262,
230
+ "eval_rouge2": 0.2716,
231
+ "eval_rougeL": 0.326,
232
+ "eval_rougeLsum": 0.3259,
233
+ "eval_runtime": 521.8224,
234
+ "eval_samples_per_second": 4.779,
235
+ "eval_steps_per_second": 1.196,
236
+ "step": 12000
237
+ },
238
+ {
239
+ "epoch": 2.32,
240
+ "learning_rate": 2.304812834224599e-05,
241
+ "loss": 0.0879,
242
+ "step": 13000
243
+ },
244
+ {
245
+ "epoch": 2.32,
246
+ "eval_gen_len": 19.0,
247
+ "eval_loss": 0.09265527129173279,
248
+ "eval_rouge1": 0.3267,
249
+ "eval_rouge2": 0.2722,
250
+ "eval_rougeL": 0.3264,
251
+ "eval_rougeLsum": 0.3264,
252
+ "eval_runtime": 521.1332,
253
+ "eval_samples_per_second": 4.786,
254
+ "eval_steps_per_second": 1.197,
255
+ "step": 13000
256
+ },
257
+ {
258
+ "epoch": 2.5,
259
+ "learning_rate": 2.2513368983957222e-05,
260
+ "loss": 0.0885,
261
+ "step": 14000
262
+ },
263
+ {
264
+ "epoch": 2.5,
265
+ "eval_gen_len": 19.0,
266
+ "eval_loss": 0.09181583672761917,
267
+ "eval_rouge1": 0.3269,
268
+ "eval_rouge2": 0.2727,
269
+ "eval_rougeL": 0.3266,
270
+ "eval_rougeLsum": 0.3265,
271
+ "eval_runtime": 521.7097,
272
+ "eval_samples_per_second": 4.78,
273
+ "eval_steps_per_second": 1.196,
274
+ "step": 14000
275
+ },
276
+ {
277
+ "epoch": 2.67,
278
+ "learning_rate": 2.197860962566845e-05,
279
+ "loss": 0.088,
280
+ "step": 15000
281
+ },
282
+ {
283
+ "epoch": 2.67,
284
+ "eval_gen_len": 19.0,
285
+ "eval_loss": 0.09152571111917496,
286
+ "eval_rouge1": 0.3249,
287
+ "eval_rouge2": 0.2702,
288
+ "eval_rougeL": 0.3245,
289
+ "eval_rougeLsum": 0.3245,
290
+ "eval_runtime": 523.0652,
291
+ "eval_samples_per_second": 4.768,
292
+ "eval_steps_per_second": 1.193,
293
+ "step": 15000
294
+ },
295
+ {
296
+ "epoch": 2.85,
297
+ "learning_rate": 2.144385026737968e-05,
298
+ "loss": 0.0879,
299
+ "step": 16000
300
+ },
301
+ {
302
+ "epoch": 2.85,
303
+ "eval_gen_len": 19.0,
304
+ "eval_loss": 0.09004141390323639,
305
+ "eval_rouge1": 0.3267,
306
+ "eval_rouge2": 0.2723,
307
+ "eval_rougeL": 0.3265,
308
+ "eval_rougeLsum": 0.3264,
309
+ "eval_runtime": 522.6782,
310
+ "eval_samples_per_second": 4.772,
311
+ "eval_steps_per_second": 1.194,
312
+ "step": 16000
313
+ },
314
+ {
315
+ "epoch": 3.03,
316
+ "learning_rate": 2.090909090909091e-05,
317
+ "loss": 0.0847,
318
+ "step": 17000
319
+ },
320
+ {
321
+ "epoch": 3.03,
322
+ "eval_gen_len": 19.0,
323
+ "eval_loss": 0.08994261920452118,
324
+ "eval_rouge1": 0.325,
325
+ "eval_rouge2": 0.2707,
326
+ "eval_rougeL": 0.3247,
327
+ "eval_rougeLsum": 0.3246,
328
+ "eval_runtime": 521.2871,
329
+ "eval_samples_per_second": 4.784,
330
+ "eval_steps_per_second": 1.197,
331
+ "step": 17000
332
+ },
333
+ {
334
+ "epoch": 3.21,
335
+ "learning_rate": 2.037433155080214e-05,
336
+ "loss": 0.0793,
337
+ "step": 18000
338
+ },
339
+ {
340
+ "epoch": 3.21,
341
+ "eval_gen_len": 19.0,
342
+ "eval_loss": 0.09187126904726028,
343
+ "eval_rouge1": 0.3276,
344
+ "eval_rouge2": 0.273,
345
+ "eval_rougeL": 0.3273,
346
+ "eval_rougeLsum": 0.3273,
347
+ "eval_runtime": 522.4505,
348
+ "eval_samples_per_second": 4.774,
349
+ "eval_steps_per_second": 1.194,
350
+ "step": 18000
351
+ }
352
+ ],
353
+ "max_steps": 56100,
354
+ "num_train_epochs": 10,
355
+ "total_flos": 3.915075106706227e+16,
356
+ "trial_name": null,
357
+ "trial_params": null
358
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74d2710f5bc9d552f22382ee95dc03eef906aa96cb3bdb69932e8ff7b0a634d8
3
+ size 3567