paride92 commited on
Commit
dcd6759
·
verified ·
1 Parent(s): 4dc285e

Upload Model

Browse files
Files changed (4) hide show
  1. config.json +29 -0
  2. pytorch_model.bin +3 -0
  3. tokenizer.model +3 -0
  4. trainig.txt +111 -0
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "BertForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": null,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "is_decoder": false,
17
+ "layer_norm_eps": 1e-12,
18
+ "max_position_embeddings": 512,
19
+ "model_type": "bert",
20
+ "num_attention_heads": 12,
21
+ "num_hidden_layers": 12,
22
+ "pad_token_id": 0,
23
+ "problem_type": "single_label_classification",
24
+ "tie_word_embeddings": true,
25
+ "transformers_version": "5.0.0",
26
+ "type_vocab_size": 2,
27
+ "use_cache": true,
28
+ "vocab_size": 32102
29
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1acbcb199d50bdd5acbecdefc592755d1391443590666bb188d5900a867faac
3
+ size 442872135
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:873dbed1150738ae871995b5fc7ed04e7a3f5be0cb496c0219df4d3efa51e6d0
3
+ size 235127
trainig.txt ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config.json: 100%
2
+  433/433 [00:00<00:00, 53.7kB/s]
3
+ model.safetensors: 100%
4
+  445M/445M [00:04<00:00, 205MB/s]
5
+ Loading weights: 100%
6
+  199/199 [00:00<00:00, 974.29it/s, Materializing param=bert.pooler.dense.weight]
7
+ BertForSequenceClassification LOAD REPORT from: dbmdz/bert-base-italian-xxl-cased
8
+ Key | Status |
9
+ -------------------------------------------+------------+-
10
+ cls.seq_relationship.weight | UNEXPECTED |
11
+ cls.predictions.bias | UNEXPECTED |
12
+ cls.predictions.transform.dense.bias | UNEXPECTED |
13
+ cls.seq_relationship.bias | UNEXPECTED |
14
+ cls.predictions.transform.LayerNorm.bias | UNEXPECTED |
15
+ cls.predictions.transform.LayerNorm.weight | UNEXPECTED |
16
+ cls.predictions.transform.dense.weight | UNEXPECTED |
17
+ classifier.bias | MISSING |
18
+ classifier.weight | MISSING |
19
+
20
+ Notes:
21
+ - UNEXPECTED :can be ignored when loading from different task/architecture; not ok if you expect identical arch.
22
+ - MISSING :those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.
23
+
24
+ ======== Epoch 1 / 3 ========
25
+ Training...
26
+ Batch 40 of 378. Elapsed: 0:00:19.
27
+ Batch 80 of 378. Elapsed: 0:00:38.
28
+ Batch 120 of 378. Elapsed: 0:00:56.
29
+ Batch 160 of 378. Elapsed: 0:01:14.
30
+ Batch 200 of 378. Elapsed: 0:01:33.
31
+ Batch 240 of 378. Elapsed: 0:01:51.
32
+ Batch 280 of 378. Elapsed: 0:02:09.
33
+ Batch 320 of 378. Elapsed: 0:02:28.
34
+ Batch 360 of 378. Elapsed: 0:02:46.
35
+
36
+ Average training loss: 0.39
37
+ Training took: 0:02:54
38
+
39
+ Running Validation...
40
+
41
+ Average test loss: 0.36
42
+ Validation took: 0:00:15
43
+ precision recall f1-score support
44
+
45
+ 0 0.80 0.93 0.86 2823
46
+ 1 0.90 0.71 0.79 2351
47
+
48
+ accuracy 0.83 5174
49
+ macro avg 0.85 0.82 0.83 5174
50
+ weighted avg 0.84 0.83 0.83 5174
51
+
52
+
53
+ ======== Epoch 2 / 3 ========
54
+ Training...
55
+ Batch 40 of 378. Elapsed: 0:00:18.
56
+ Batch 80 of 378. Elapsed: 0:00:36.
57
+ Batch 120 of 378. Elapsed: 0:00:55.
58
+ Batch 160 of 378. Elapsed: 0:01:13.
59
+ Batch 200 of 378. Elapsed: 0:01:31.
60
+ Batch 240 of 378. Elapsed: 0:01:50.
61
+ Batch 280 of 378. Elapsed: 0:02:08.
62
+ Batch 320 of 378. Elapsed: 0:02:26.
63
+ Batch 360 of 378. Elapsed: 0:02:45.
64
+
65
+ Average training loss: 0.20
66
+ Training took: 0:02:53
67
+
68
+ Running Validation...
69
+
70
+ Average test loss: 0.41
71
+ Validation took: 0:00:15
72
+ precision recall f1-score support
73
+
74
+ 0 0.82 0.91 0.87 2823
75
+ 1 0.88 0.77 0.82 2351
76
+
77
+ accuracy 0.85 5174
78
+ macro avg 0.85 0.84 0.84 5174
79
+ weighted avg 0.85 0.85 0.85 5174
80
+
81
+
82
+ ======== Epoch 3 / 3 ========
83
+ Training...
84
+ Batch 40 of 378. Elapsed: 0:00:18.
85
+ Batch 80 of 378. Elapsed: 0:00:36.
86
+ Batch 120 of 378. Elapsed: 0:00:55.
87
+ Batch 160 of 378. Elapsed: 0:01:13.
88
+ Batch 200 of 378. Elapsed: 0:01:31.
89
+ Batch 240 of 378. Elapsed: 0:01:50.
90
+ Batch 280 of 378. Elapsed: 0:02:08.
91
+ Batch 320 of 378. Elapsed: 0:02:26.
92
+ Batch 360 of 378. Elapsed: 0:02:45.
93
+
94
+ Average training loss: 0.07
95
+ Training took: 0:02:53
96
+
97
+ Running Validation...
98
+
99
+ Average test loss: 0.60
100
+ Validation took: 0:00:15
101
+ precision recall f1-score support
102
+
103
+ 0 0.86 0.89 0.88 2823
104
+ 1 0.87 0.83 0.85 2351
105
+
106
+ accuracy 0.86 5174
107
+ macro avg 0.86 0.86 0.86 5174
108
+ weighted avg 0.86 0.86 0.86 5174
109
+
110
+
111
+ Training complete!