intelia-lab commited on
Commit
2ddb5c0
·
verified ·
1 Parent(s): 85eede4

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-base",
3
+ "architectures": [
4
+ "MT5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "mt5",
19
+ "num_decoder_layers": 12,
20
+ "num_heads": 12,
21
+ "num_layers": 12,
22
+ "output_past": true,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "tie_word_embeddings": false,
27
+ "tokenizer_class": "T5Tokenizer",
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.23.0",
30
+ "use_cache": true,
31
+ "vocab_size": 250112
32
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d82996cd4396bdbf3739380d7efebb95b16a02c7475082b2b54622750ed9c79
3
+ size 4659378441
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c0219749288e7a9e48936ffb1a5e59a1f90bafee3f61845373188bc954b6ade
3
+ size 2329700173
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c446be1f8516743233b4e0b9846261662475a5cb4d5d45632a9d379845ad44ba
3
+ size 14503
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3292d391378c15098fa3e719b4601bb844554b25583c651ba76a6e6925f33070
3
+ size 623
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ab6fe9ed1c8353b3bfcf922087786bade8c0ee4f7dfdeca1b424c725aaef39
3
+ size 16330466
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": null,
3
+ "eos_token": "</s>",
4
+ "extra_ids": 0,
5
+ "name_or_path": "google/mt5-base",
6
+ "pad_token": "<pad>",
7
+ "sp_model_kwargs": {},
8
+ "special_tokens_map_file": "/home/patrick/.cache/torch/transformers/685ac0ca8568ec593a48b61b0a3c272beee9bc194a3c7241d15dcadb5f875e53.f76030f3ec1b96a8199b2593390c610e76ca8028ef3d24680000619ffb646276",
9
+ "tokenizer_class": "T5Tokenizer",
10
+ "unk_token": "<unk>"
11
+ }
trainer_state.json ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 11.899416255051639,
5
+ "global_step": 26500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_bleu 1": 70.3337,
13
+ "eval_bleu 2": 60.798,
14
+ "eval_bleu 3": 49.7924,
15
+ "eval_bleu 4": 33.7923,
16
+ "eval_loss": 0.6441967487335205,
17
+ "eval_rouge1": 50.4785,
18
+ "eval_rouge2": 32.871,
19
+ "eval_rougeL": 50.0855,
20
+ "eval_rougeLsum": 50.0558,
21
+ "eval_runtime": 684.1071,
22
+ "eval_samples_per_second": 12.122,
23
+ "eval_score": 32.2794,
24
+ "eval_sim_cos": 0.7653,
25
+ "eval_steps_per_second": 3.032,
26
+ "step": 2227
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "eval_bleu 1": 70.3322,
31
+ "eval_bleu 2": 61.1932,
32
+ "eval_bleu 3": 50.7058,
33
+ "eval_bleu 4": 35.6325,
34
+ "eval_loss": 0.5984485745429993,
35
+ "eval_rouge1": 51.6586,
36
+ "eval_rouge2": 34.8166,
37
+ "eval_rougeL": 51.2221,
38
+ "eval_rougeLsum": 51.2004,
39
+ "eval_runtime": 689.9391,
40
+ "eval_samples_per_second": 12.02,
41
+ "eval_score": 33.6509,
42
+ "eval_sim_cos": 0.7702,
43
+ "eval_steps_per_second": 3.006,
44
+ "step": 4454
45
+ },
46
+ {
47
+ "epoch": 3.0,
48
+ "eval_bleu 1": 69.8987,
49
+ "eval_bleu 2": 60.7039,
50
+ "eval_bleu 3": 50.0866,
51
+ "eval_bleu 4": 35.1525,
52
+ "eval_loss": 0.579205334186554,
53
+ "eval_rouge1": 51.7058,
54
+ "eval_rouge2": 34.7654,
55
+ "eval_rougeL": 51.0676,
56
+ "eval_rougeLsum": 51.0349,
57
+ "eval_runtime": 700.0697,
58
+ "eval_samples_per_second": 11.846,
59
+ "eval_score": 34.5869,
60
+ "eval_sim_cos": 0.7717,
61
+ "eval_steps_per_second": 2.963,
62
+ "step": 6681
63
+ },
64
+ {
65
+ "epoch": 4.0,
66
+ "eval_bleu 1": 71.3187,
67
+ "eval_bleu 2": 62.1795,
68
+ "eval_bleu 3": 51.5483,
69
+ "eval_bleu 4": 36.2898,
70
+ "eval_loss": 0.5839173793792725,
71
+ "eval_rouge1": 52.2274,
72
+ "eval_rouge2": 35.3155,
73
+ "eval_rougeL": 51.6932,
74
+ "eval_rougeLsum": 51.6431,
75
+ "eval_runtime": 686.5612,
76
+ "eval_samples_per_second": 12.079,
77
+ "eval_score": 34.2209,
78
+ "eval_sim_cos": 0.7769,
79
+ "eval_steps_per_second": 3.021,
80
+ "step": 8908
81
+ },
82
+ {
83
+ "epoch": 5.0,
84
+ "eval_bleu 1": 70.0029,
85
+ "eval_bleu 2": 60.8847,
86
+ "eval_bleu 3": 50.4903,
87
+ "eval_bleu 4": 35.8533,
88
+ "eval_loss": 0.5703114867210388,
89
+ "eval_rouge1": 52.2272,
90
+ "eval_rouge2": 35.5618,
91
+ "eval_rougeL": 51.643,
92
+ "eval_rougeLsum": 51.622,
93
+ "eval_runtime": 704.1871,
94
+ "eval_samples_per_second": 11.777,
95
+ "eval_score": 35.0827,
96
+ "eval_sim_cos": 0.7735,
97
+ "eval_steps_per_second": 2.945,
98
+ "step": 11135
99
+ },
100
+ {
101
+ "epoch": 6.0,
102
+ "eval_bleu 1": 69.3858,
103
+ "eval_bleu 2": 60.1128,
104
+ "eval_bleu 3": 49.6906,
105
+ "eval_bleu 4": 35.2287,
106
+ "eval_loss": 0.5710490345954895,
107
+ "eval_rouge1": 52.586,
108
+ "eval_rouge2": 35.9223,
109
+ "eval_rougeL": 51.8411,
110
+ "eval_rougeLsum": 51.8356,
111
+ "eval_runtime": 711.5517,
112
+ "eval_samples_per_second": 11.655,
113
+ "eval_score": 35.7035,
114
+ "eval_sim_cos": 0.7745,
115
+ "eval_steps_per_second": 2.915,
116
+ "step": 13362
117
+ },
118
+ {
119
+ "epoch": 7.0,
120
+ "eval_bleu 1": 69.9726,
121
+ "eval_bleu 2": 60.7494,
122
+ "eval_bleu 3": 50.2446,
123
+ "eval_bleu 4": 35.578,
124
+ "eval_loss": 0.5707941055297852,
125
+ "eval_rouge1": 52.6509,
126
+ "eval_rouge2": 36.0061,
127
+ "eval_rougeL": 51.9829,
128
+ "eval_rougeLsum": 51.9582,
129
+ "eval_runtime": 703.3177,
130
+ "eval_samples_per_second": 11.791,
131
+ "eval_score": 35.3903,
132
+ "eval_sim_cos": 0.776,
133
+ "eval_steps_per_second": 2.949,
134
+ "step": 15589
135
+ },
136
+ {
137
+ "epoch": 8.0,
138
+ "learning_rate": 1.666853764406526e-05,
139
+ "loss": 0.6526,
140
+ "step": 17815
141
+ },
142
+ {
143
+ "epoch": 8.0,
144
+ "eval_bleu 1": 70.0548,
145
+ "eval_bleu 2": 60.8059,
146
+ "eval_bleu 3": 50.2906,
147
+ "eval_bleu 4": 35.6541,
148
+ "eval_loss": 0.5693395137786865,
149
+ "eval_rouge1": 53.0005,
150
+ "eval_rouge2": 36.3939,
151
+ "eval_rougeL": 52.2168,
152
+ "eval_rougeLsum": 52.2032,
153
+ "eval_runtime": 709.1405,
154
+ "eval_samples_per_second": 11.694,
155
+ "eval_score": 35.9363,
156
+ "eval_sim_cos": 0.778,
157
+ "eval_steps_per_second": 2.925,
158
+ "step": 17816
159
+ },
160
+ {
161
+ "epoch": 9.0,
162
+ "eval_bleu 1": 69.7601,
163
+ "eval_bleu 2": 60.4978,
164
+ "eval_bleu 3": 50.0289,
165
+ "eval_bleu 4": 35.5284,
166
+ "eval_loss": 0.5695525407791138,
167
+ "eval_rouge1": 52.9499,
168
+ "eval_rouge2": 36.3434,
169
+ "eval_rougeL": 52.1838,
170
+ "eval_rougeLsum": 52.1727,
171
+ "eval_runtime": 713.0996,
172
+ "eval_samples_per_second": 11.63,
173
+ "eval_score": 36.1266,
174
+ "eval_sim_cos": 0.7772,
175
+ "eval_steps_per_second": 2.908,
176
+ "step": 20043
177
+ },
178
+ {
179
+ "epoch": 10.0,
180
+ "eval_bleu 1": 69.3518,
181
+ "eval_bleu 2": 60.0768,
182
+ "eval_bleu 3": 49.6026,
183
+ "eval_bleu 4": 35.2072,
184
+ "eval_loss": 0.5750804543495178,
185
+ "eval_rouge1": 52.9485,
186
+ "eval_rouge2": 36.4398,
187
+ "eval_rougeL": 52.1386,
188
+ "eval_rougeLsum": 52.1295,
189
+ "eval_runtime": 717.0443,
190
+ "eval_samples_per_second": 11.566,
191
+ "eval_score": 36.356,
192
+ "eval_sim_cos": 0.7769,
193
+ "eval_steps_per_second": 2.892,
194
+ "step": 22270
195
+ },
196
+ {
197
+ "epoch": 11.0,
198
+ "eval_bleu 1": 70.0005,
199
+ "eval_bleu 2": 60.7865,
200
+ "eval_bleu 3": 50.3355,
201
+ "eval_bleu 4": 35.8009,
202
+ "eval_loss": 0.5696647763252258,
203
+ "eval_rouge1": 53.1415,
204
+ "eval_rouge2": 36.5933,
205
+ "eval_rougeL": 52.3479,
206
+ "eval_rougeLsum": 52.3203,
207
+ "eval_runtime": 710.6735,
208
+ "eval_samples_per_second": 11.669,
209
+ "eval_score": 36.3183,
210
+ "eval_sim_cos": 0.779,
211
+ "eval_steps_per_second": 2.918,
212
+ "step": 24497
213
+ }
214
+ ],
215
+ "max_steps": 26724,
216
+ "num_train_epochs": 12,
217
+ "total_flos": 7.298085567419412e+17,
218
+ "trial_name": null,
219
+ "trial_params": null
220
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b411852394207194116e23291740be29563587e30acb0e547ee8802e37e96708
3
+ size 3567