CNR-ILC commited on
Commit
de071fa
·
verified ·
1 Parent(s): 409eb8e

ILC-CNR/gs-aristoBERTo

Browse files
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ base_model: Jacobo/aristoBERTo
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: model-checkpoints
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # model-checkpoints
15
+
16
+ This model is a fine-tuned version of [Jacobo/aristoBERTo](https://huggingface.co/Jacobo/aristoBERTo) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 1.9964
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 5e-05
38
+ - train_batch_size: 16
39
+ - eval_batch_size: 16
40
+ - seed: 42
41
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
42
+ - lr_scheduler_type: linear
43
+ - num_epochs: 10
44
+ - mixed_precision_training: Native AMP
45
+
46
+ ### Training results
47
+
48
+ | Training Loss | Epoch | Step | Validation Loss |
49
+ |:-------------:|:-----:|:-----:|:---------------:|
50
+ | 2.9782 | 1.0 | 5617 | 2.5919 |
51
+ | 2.5711 | 2.0 | 11234 | 2.4118 |
52
+ | 2.4112 | 3.0 | 16851 | 2.3027 |
53
+ | 2.2997 | 4.0 | 22468 | 2.2044 |
54
+ | 2.2077 | 5.0 | 28085 | 2.1578 |
55
+ | 2.1414 | 6.0 | 33702 | 2.1075 |
56
+ | 2.0819 | 7.0 | 39319 | 2.0618 |
57
+ | 2.0388 | 8.0 | 44936 | 2.0305 |
58
+ | 2.0048 | 9.0 | 50553 | 2.0053 |
59
+ | 1.9773 | 10.0 | 56170 | 2.0138 |
60
+
61
+
62
+ ### Framework versions
63
+
64
+ - Transformers 4.50.3
65
+ - Pytorch 2.6.0+cu124
66
+ - Datasets 3.5.0
67
+ - Tokenizers 0.21.1
README.md ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ base_model: Jacobo/aristoBERTo
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: model-checkpoints
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # model-checkpoints
15
+
16
+ This model is a fine-tuned version of [Jacobo/aristoBERTo](https://huggingface.co/Jacobo/aristoBERTo) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 1.9793
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 5e-05
38
+ - train_batch_size: 16
39
+ - eval_batch_size: 16
40
+ - seed: 42
41
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
42
+ - lr_scheduler_type: linear
43
+ - num_epochs: 10
44
+ - mixed_precision_training: Native AMP
45
+
46
+ ### Training results
47
+
48
+ | Training Loss | Epoch | Step | Validation Loss |
49
+ |:-------------:|:-----:|:-----:|:---------------:|
50
+ | 2.9726 | 1.0 | 5652 | 2.5978 |
51
+ | 2.5639 | 2.0 | 11304 | 2.3816 |
52
+ | 2.4014 | 3.0 | 16956 | 2.2645 |
53
+ | 2.2988 | 4.0 | 22608 | 2.2166 |
54
+ | 2.2089 | 5.0 | 28260 | 2.1206 |
55
+ | 2.1371 | 6.0 | 33912 | 2.0921 |
56
+ | 2.0808 | 7.0 | 39564 | 2.0355 |
57
+ | 2.0361 | 8.0 | 45216 | 2.0193 |
58
+ | 2.001 | 9.0 | 50868 | 1.9988 |
59
+ | 1.977 | 10.0 | 56520 | 1.9922 |
60
+
61
+
62
+ ### Framework versions
63
+
64
+ - Transformers 4.51.3
65
+ - Pytorch 2.7.0+cu126
66
+ - Datasets 3.5.0
67
+ - Tokenizers 0.21.1
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_loss": 1.9793492555618286,
4
+ "eval_runtime": 19.7795,
5
+ "eval_samples_per_second": 491.923,
6
+ "eval_steps_per_second": 30.789,
7
+ "step": 56520,
8
+ "total_flos": 2.975422706417664e+16,
9
+ "train_loss": 2.2677627250309627,
10
+ "train_runtime": 6971.4542,
11
+ "train_samples_per_second": 129.738,
12
+ "train_steps_per_second": 8.107
13
+ }
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 512,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 12,
16
+ "num_hidden_layers": 12,
17
+ "output_past": true,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.51.3",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 35000
25
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_loss": 1.9793492555618286,
4
+ "eval_runtime": 19.7795,
5
+ "eval_samples_per_second": 491.923,
6
+ "eval_steps_per_second": 30.789
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ac48d52adb26e2fddddb2ca2062b638e638322e6dcaa395cb7f33fd5e6f5338
3
+ size 451855232
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "step": 56520,
4
+ "total_flos": 2.975422706417664e+16,
5
+ "train_loss": 2.2677627250309627,
6
+ "train_runtime": 6971.4542,
7
+ "train_samples_per_second": 129.738,
8
+ "train_steps_per_second": 8.107
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 10.0,
6
+ "eval_steps": 500,
7
+ "global_step": 56520,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "grad_norm": 12.995927810668945,
15
+ "learning_rate": 4.50070771408351e-05,
16
+ "loss": 2.9726,
17
+ "step": 5652
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_loss": 2.5977725982666016,
22
+ "eval_runtime": 19.8579,
23
+ "eval_samples_per_second": 489.981,
24
+ "eval_steps_per_second": 30.668,
25
+ "step": 5652
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "grad_norm": 10.8749361038208,
30
+ "learning_rate": 4.000796178343949e-05,
31
+ "loss": 2.5639,
32
+ "step": 11304
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "eval_loss": 2.381584405899048,
37
+ "eval_runtime": 19.7379,
38
+ "eval_samples_per_second": 492.959,
39
+ "eval_steps_per_second": 30.854,
40
+ "step": 11304
41
+ },
42
+ {
43
+ "epoch": 3.0,
44
+ "grad_norm": 11.009535789489746,
45
+ "learning_rate": 3.500973106864827e-05,
46
+ "loss": 2.4014,
47
+ "step": 16956
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "eval_loss": 2.2645437717437744,
52
+ "eval_runtime": 19.6521,
53
+ "eval_samples_per_second": 495.112,
54
+ "eval_steps_per_second": 30.989,
55
+ "step": 16956
56
+ },
57
+ {
58
+ "epoch": 4.0,
59
+ "grad_norm": 11.495499610900879,
60
+ "learning_rate": 3.0011500353857042e-05,
61
+ "loss": 2.2988,
62
+ "step": 22608
63
+ },
64
+ {
65
+ "epoch": 4.0,
66
+ "eval_loss": 2.2165863513946533,
67
+ "eval_runtime": 19.8532,
68
+ "eval_samples_per_second": 490.097,
69
+ "eval_steps_per_second": 30.675,
70
+ "step": 22608
71
+ },
72
+ {
73
+ "epoch": 5.0,
74
+ "grad_norm": 11.591689109802246,
75
+ "learning_rate": 2.501238499646143e-05,
76
+ "loss": 2.2089,
77
+ "step": 28260
78
+ },
79
+ {
80
+ "epoch": 5.0,
81
+ "eval_loss": 2.120560884475708,
82
+ "eval_runtime": 19.806,
83
+ "eval_samples_per_second": 491.265,
84
+ "eval_steps_per_second": 30.748,
85
+ "step": 28260
86
+ },
87
+ {
88
+ "epoch": 6.0,
89
+ "grad_norm": 10.091324806213379,
90
+ "learning_rate": 2.0015923566878984e-05,
91
+ "loss": 2.1371,
92
+ "step": 33912
93
+ },
94
+ {
95
+ "epoch": 6.0,
96
+ "eval_loss": 2.0921123027801514,
97
+ "eval_runtime": 19.6342,
98
+ "eval_samples_per_second": 495.564,
99
+ "eval_steps_per_second": 31.017,
100
+ "step": 33912
101
+ },
102
+ {
103
+ "epoch": 7.0,
104
+ "grad_norm": 9.590017318725586,
105
+ "learning_rate": 1.5016808209483369e-05,
106
+ "loss": 2.0808,
107
+ "step": 39564
108
+ },
109
+ {
110
+ "epoch": 7.0,
111
+ "eval_loss": 2.035478115081787,
112
+ "eval_runtime": 19.7938,
113
+ "eval_samples_per_second": 491.568,
114
+ "eval_steps_per_second": 30.767,
115
+ "step": 39564
116
+ },
117
+ {
118
+ "epoch": 8.0,
119
+ "grad_norm": 11.4172945022583,
120
+ "learning_rate": 1.0017692852087757e-05,
121
+ "loss": 2.0361,
122
+ "step": 45216
123
+ },
124
+ {
125
+ "epoch": 8.0,
126
+ "eval_loss": 2.019296169281006,
127
+ "eval_runtime": 19.7587,
128
+ "eval_samples_per_second": 492.442,
129
+ "eval_steps_per_second": 30.822,
130
+ "step": 45216
131
+ },
132
+ {
133
+ "epoch": 9.0,
134
+ "grad_norm": 13.36776351928711,
135
+ "learning_rate": 5.019462137296532e-06,
136
+ "loss": 2.001,
137
+ "step": 50868
138
+ },
139
+ {
140
+ "epoch": 9.0,
141
+ "eval_loss": 1.9987555742263794,
142
+ "eval_runtime": 19.4188,
143
+ "eval_samples_per_second": 501.06,
144
+ "eval_steps_per_second": 31.361,
145
+ "step": 50868
146
+ },
147
+ {
148
+ "epoch": 10.0,
149
+ "grad_norm": 11.329434394836426,
150
+ "learning_rate": 2.1231422505307857e-08,
151
+ "loss": 1.977,
152
+ "step": 56520
153
+ },
154
+ {
155
+ "epoch": 10.0,
156
+ "eval_loss": 1.992159366607666,
157
+ "eval_runtime": 19.7519,
158
+ "eval_samples_per_second": 492.612,
159
+ "eval_steps_per_second": 30.833,
160
+ "step": 56520
161
+ },
162
+ {
163
+ "epoch": 10.0,
164
+ "step": 56520,
165
+ "total_flos": 2.975422706417664e+16,
166
+ "train_loss": 2.2677627250309627,
167
+ "train_runtime": 6971.4542,
168
+ "train_samples_per_second": 129.738,
169
+ "train_steps_per_second": 8.107
170
+ },
171
+ {
172
+ "epoch": 10.0,
173
+ "eval_loss": 1.9793492555618286,
174
+ "eval_runtime": 19.7795,
175
+ "eval_samples_per_second": 491.923,
176
+ "eval_steps_per_second": 30.789,
177
+ "step": 56520
178
+ }
179
+ ],
180
+ "logging_steps": 500,
181
+ "max_steps": 56520,
182
+ "num_input_tokens_seen": 0,
183
+ "num_train_epochs": 10,
184
+ "save_steps": 500,
185
+ "stateful_callbacks": {
186
+ "TrainerControl": {
187
+ "args": {
188
+ "should_epoch_stop": false,
189
+ "should_evaluate": false,
190
+ "should_log": false,
191
+ "should_save": false,
192
+ "should_training_stop": false
193
+ },
194
+ "attributes": {}
195
+ }
196
+ },
197
+ "total_flos": 2.975422706417664e+16,
198
+ "train_batch_size": 16,
199
+ "trial_name": null,
200
+ "trial_params": null
201
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b67c2a3013a187a18e50cba2537a82dc555169447cfcc62169c9520a9e77c19
3
+ size 5713
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff