Upload roberta-base model files
Browse filesRoBERTa model and tokenizer files for intent detection
- checkpoint-best/config.json +55 -0
- checkpoint-best/model.safetensors +3 -0
- checkpoint-best/optimizer.pt +3 -0
- checkpoint-best/scheduler.pt +3 -0
- checkpoint-best/training_0.bin +3 -0
- checkpoint-best/training_1.bin +3 -0
- checkpoint-best/training_2.bin +3 -0
- checkpoint-last/config.json +55 -0
- checkpoint-last/idx_file.txt +1 -0
- checkpoint-last/model.safetensors +3 -0
- checkpoint-last/optimizer.pt +3 -0
- checkpoint-last/scheduler.pt +3 -0
- checkpoint-last/step_file.txt +1 -0
- config.json +55 -0
- eval_results.txt +180 -0
- model.safetensors +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +17 -0
- training_args.bin +3 -0
checkpoint-best/config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"RobertaForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "LABEL_0",
|
| 16 |
+
"1": "LABEL_1",
|
| 17 |
+
"2": "LABEL_2",
|
| 18 |
+
"3": "LABEL_3",
|
| 19 |
+
"4": "LABEL_4",
|
| 20 |
+
"5": "LABEL_5",
|
| 21 |
+
"6": "LABEL_6",
|
| 22 |
+
"7": "LABEL_7",
|
| 23 |
+
"8": "LABEL_8",
|
| 24 |
+
"9": "LABEL_9",
|
| 25 |
+
"10": "LABEL_10"
|
| 26 |
+
},
|
| 27 |
+
"initializer_range": 0.02,
|
| 28 |
+
"intermediate_size": 3072,
|
| 29 |
+
"is_decoder": false,
|
| 30 |
+
"label2id": {
|
| 31 |
+
"LABEL_0": 0,
|
| 32 |
+
"LABEL_1": 1,
|
| 33 |
+
"LABEL_10": 10,
|
| 34 |
+
"LABEL_2": 2,
|
| 35 |
+
"LABEL_3": 3,
|
| 36 |
+
"LABEL_4": 4,
|
| 37 |
+
"LABEL_5": 5,
|
| 38 |
+
"LABEL_6": 6,
|
| 39 |
+
"LABEL_7": 7,
|
| 40 |
+
"LABEL_8": 8,
|
| 41 |
+
"LABEL_9": 9
|
| 42 |
+
},
|
| 43 |
+
"layer_norm_eps": 1e-05,
|
| 44 |
+
"max_position_embeddings": 514,
|
| 45 |
+
"model_type": "roberta",
|
| 46 |
+
"num_attention_heads": 12,
|
| 47 |
+
"num_hidden_layers": 12,
|
| 48 |
+
"pad_token_id": 1,
|
| 49 |
+
"problem_type": "single_label_classification",
|
| 50 |
+
"tie_word_embeddings": true,
|
| 51 |
+
"transformers_version": "5.0.0",
|
| 52 |
+
"type_vocab_size": 1,
|
| 53 |
+
"use_cache": true,
|
| 54 |
+
"vocab_size": 50265
|
| 55 |
+
}
|
checkpoint-best/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b43d88cca0c56d2e97971d49889aca66230ee20e1141466767aaffe1a58abf9
|
| 3 |
+
size 498640484
|
checkpoint-best/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0e925cd459dc9c3390463ce62bdbe8d4ccd21a108e81e138831374ef3547e91
|
| 3 |
+
size 997401227
|
checkpoint-best/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49877f883a73acf0e98bcd1698c8a54677f7577b5fc7c5b0d23842207458f607
|
| 3 |
+
size 1465
|
checkpoint-best/training_0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58ec65133ed17b44a22639b276d219fc64ee2e270abde49ad236332f6ad42d36
|
| 3 |
+
size 2751
|
checkpoint-best/training_1.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87cac335688338d7f585fb42522a1563c6cc2d84e0e3e0765c7836264f10754f
|
| 3 |
+
size 2751
|
checkpoint-best/training_2.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:121ab9fc2a9417288737e79067f80ac928ad8aa1d7d04506f7f2501e4944c3d1
|
| 3 |
+
size 2751
|
checkpoint-last/config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"RobertaForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "LABEL_0",
|
| 16 |
+
"1": "LABEL_1",
|
| 17 |
+
"2": "LABEL_2",
|
| 18 |
+
"3": "LABEL_3",
|
| 19 |
+
"4": "LABEL_4",
|
| 20 |
+
"5": "LABEL_5",
|
| 21 |
+
"6": "LABEL_6",
|
| 22 |
+
"7": "LABEL_7",
|
| 23 |
+
"8": "LABEL_8",
|
| 24 |
+
"9": "LABEL_9",
|
| 25 |
+
"10": "LABEL_10"
|
| 26 |
+
},
|
| 27 |
+
"initializer_range": 0.02,
|
| 28 |
+
"intermediate_size": 3072,
|
| 29 |
+
"is_decoder": false,
|
| 30 |
+
"label2id": {
|
| 31 |
+
"LABEL_0": 0,
|
| 32 |
+
"LABEL_1": 1,
|
| 33 |
+
"LABEL_10": 10,
|
| 34 |
+
"LABEL_2": 2,
|
| 35 |
+
"LABEL_3": 3,
|
| 36 |
+
"LABEL_4": 4,
|
| 37 |
+
"LABEL_5": 5,
|
| 38 |
+
"LABEL_6": 6,
|
| 39 |
+
"LABEL_7": 7,
|
| 40 |
+
"LABEL_8": 8,
|
| 41 |
+
"LABEL_9": 9
|
| 42 |
+
},
|
| 43 |
+
"layer_norm_eps": 1e-05,
|
| 44 |
+
"max_position_embeddings": 514,
|
| 45 |
+
"model_type": "roberta",
|
| 46 |
+
"num_attention_heads": 12,
|
| 47 |
+
"num_hidden_layers": 12,
|
| 48 |
+
"pad_token_id": 1,
|
| 49 |
+
"problem_type": "single_label_classification",
|
| 50 |
+
"tie_word_embeddings": true,
|
| 51 |
+
"transformers_version": "5.0.0",
|
| 52 |
+
"type_vocab_size": 1,
|
| 53 |
+
"use_cache": true,
|
| 54 |
+
"vocab_size": 50265
|
| 55 |
+
}
|
checkpoint-last/idx_file.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
9
|
checkpoint-last/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76ffdd1acf85d3e695fc365682b9e38b0d4ff61f1c531df5e0579b4792fd8dbf
|
| 3 |
+
size 498640484
|
checkpoint-last/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2237ea8ca40adf381486668ae9b107472d6bf61d51806ae36c23f50b5970250
|
| 3 |
+
size 997401227
|
checkpoint-last/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:855783c4a305fdbf382f9b519098423354c83b4be8827dd8a14eb9d04ab16548
|
| 3 |
+
size 1465
|
checkpoint-last/step_file.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
8470
|
config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"RobertaForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "LABEL_0",
|
| 16 |
+
"1": "LABEL_1",
|
| 17 |
+
"2": "LABEL_2",
|
| 18 |
+
"3": "LABEL_3",
|
| 19 |
+
"4": "LABEL_4",
|
| 20 |
+
"5": "LABEL_5",
|
| 21 |
+
"6": "LABEL_6",
|
| 22 |
+
"7": "LABEL_7",
|
| 23 |
+
"8": "LABEL_8",
|
| 24 |
+
"9": "LABEL_9",
|
| 25 |
+
"10": "LABEL_10"
|
| 26 |
+
},
|
| 27 |
+
"initializer_range": 0.02,
|
| 28 |
+
"intermediate_size": 3072,
|
| 29 |
+
"is_decoder": false,
|
| 30 |
+
"label2id": {
|
| 31 |
+
"LABEL_0": 0,
|
| 32 |
+
"LABEL_1": 1,
|
| 33 |
+
"LABEL_10": 10,
|
| 34 |
+
"LABEL_2": 2,
|
| 35 |
+
"LABEL_3": 3,
|
| 36 |
+
"LABEL_4": 4,
|
| 37 |
+
"LABEL_5": 5,
|
| 38 |
+
"LABEL_6": 6,
|
| 39 |
+
"LABEL_7": 7,
|
| 40 |
+
"LABEL_8": 8,
|
| 41 |
+
"LABEL_9": 9
|
| 42 |
+
},
|
| 43 |
+
"layer_norm_eps": 1e-05,
|
| 44 |
+
"max_position_embeddings": 514,
|
| 45 |
+
"model_type": "roberta",
|
| 46 |
+
"num_attention_heads": 12,
|
| 47 |
+
"num_hidden_layers": 12,
|
| 48 |
+
"pad_token_id": 1,
|
| 49 |
+
"problem_type": "single_label_classification",
|
| 50 |
+
"tie_word_embeddings": true,
|
| 51 |
+
"transformers_version": "5.0.0",
|
| 52 |
+
"type_vocab_size": 1,
|
| 53 |
+
"use_cache": true,
|
| 54 |
+
"vocab_size": 50265
|
| 55 |
+
}
|
eval_results.txt
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
evaluate 0
|
| 2 |
+
acc = 0.7303735787763942
|
| 3 |
+
acc_and_f1_macro = 0.6964044280843411
|
| 4 |
+
acc_and_f1_weighted = 0.7272904268085286
|
| 5 |
+
class_f1 = [np.float64(0.16326530612244897), np.float64(0.7260579064587973), np.float64(0.7904509283819627), np.float64(0.42553191489361697), np.float64(0.5753424657534246), np.float64(0.5092592592592593), np.float64(0.9316770186335404), np.float64(0.8947368421052632), np.float64(0.9455958549222798)]
|
| 6 |
+
class_p = [np.float64(0.5714285714285714), np.float64(0.6877637130801688), np.float64(0.8010752688172043), np.float64(0.42168674698795183), np.float64(0.48554913294797686), np.float64(0.6547619047619048), np.float64(0.9259259259259259), np.float64(0.8854166666666666), np.float64(0.9530026109660574)]
|
| 7 |
+
class_r = [np.float64(0.09523809523809523), np.float64(0.7688679245283019), np.float64(0.7801047120418848), np.float64(0.4294478527607362), np.float64(0.7058823529411765), np.float64(0.4166666666666667), np.float64(0.9375), np.float64(0.9042553191489362), np.float64(0.9383033419023136)]
|
| 8 |
+
confusion_matrix = [[ 4 8 0 16 4 10 0 0 0]
|
| 9 |
+
[ 0 163 21 0 18 10 0 0 0]
|
| 10 |
+
[ 0 34 149 2 6 0 0 0 0]
|
| 11 |
+
[ 3 12 6 70 62 10 0 0 0]
|
| 12 |
+
[ 0 10 6 26 168 28 0 0 0]
|
| 13 |
+
[ 0 10 4 52 88 110 0 0 0]
|
| 14 |
+
[ 0 0 0 0 0 0 150 8 2]
|
| 15 |
+
[ 0 0 0 0 0 0 2 170 16]
|
| 16 |
+
[ 0 0 0 0 0 0 10 14 365]]
|
| 17 |
+
f1_macro = 0.6624352773922881
|
| 18 |
+
f1_weighted = 0.724207274840663
|
| 19 |
+
evaluate 1
|
| 20 |
+
acc = 0.7563616675690309
|
| 21 |
+
acc_and_f1_macro = 0.7330873058383998
|
| 22 |
+
acc_and_f1_weighted = 0.7565365056216706
|
| 23 |
+
class_f1 = [np.float64(0.3855421686746988), np.float64(0.7427293064876959), np.float64(0.8), np.float64(0.49390243902439024), np.float64(0.5932203389830509), np.float64(0.6704761904761904), np.float64(0.8813559322033899), np.float64(0.8802228412256267), np.float64(0.9408672798948752)]
|
| 24 |
+
class_p = [np.float64(0.3902439024390244), np.float64(0.7063829787234043), np.float64(0.8390804597701149), np.float64(0.4909090909090909), np.float64(0.5982905982905983), np.float64(0.6743295019157088), np.float64(0.8041237113402062), np.float64(0.9239766081871345), np.float64(0.9623655913978495)]
|
| 25 |
+
class_r = [np.float64(0.38095238095238093), np.float64(0.7830188679245284), np.float64(0.7643979057591623), np.float64(0.49693251533742333), np.float64(0.5882352941176471), np.float64(0.6666666666666666), np.float64(0.975), np.float64(0.8404255319148937), np.float64(0.9203084832904884)]
|
| 26 |
+
confusion_matrix = [[ 16 6 0 8 2 10 0 0 0]
|
| 27 |
+
[ 4 166 12 0 16 14 0 0 0]
|
| 28 |
+
[ 0 37 146 2 6 0 0 0 0]
|
| 29 |
+
[ 9 8 8 81 34 23 0 0 0]
|
| 30 |
+
[ 2 8 4 46 140 38 0 0 0]
|
| 31 |
+
[ 10 10 4 28 36 176 0 0 0]
|
| 32 |
+
[ 0 0 0 0 0 0 156 0 4]
|
| 33 |
+
[ 0 0 0 0 0 0 20 158 10]
|
| 34 |
+
[ 0 0 0 0 0 0 18 13 358]]
|
| 35 |
+
f1_macro = 0.7098129441077687
|
| 36 |
+
f1_weighted = 0.7567113436743105
|
| 37 |
+
evaluate 2
|
| 38 |
+
acc = 0.7698971304818625
|
| 39 |
+
acc_and_f1_macro = 0.7509621830084688
|
| 40 |
+
acc_and_f1_weighted = 0.7699789106634821
|
| 41 |
+
class_f1 = [np.float64(0.47058823529411764), np.float64(0.733644859813084), np.float64(0.8126649076517151), np.float64(0.49025069637883006), np.float64(0.6057906458797327), np.float64(0.707635009310987), np.float64(0.9447852760736196), np.float64(0.8924731182795699), np.float64(0.9304123711340206)]
|
| 42 |
+
class_p = [np.float64(0.6153846153846154), np.float64(0.7268518518518519), np.float64(0.8191489361702128), np.float64(0.4489795918367347), np.float64(0.6445497630331753), np.float64(0.6959706959706959), np.float64(0.927710843373494), np.float64(0.9021739130434783), np.float64(0.9328165374677002)]
|
| 43 |
+
class_r = [np.float64(0.38095238095238093), np.float64(0.7405660377358491), np.float64(0.806282722513089), np.float64(0.5398773006134969), np.float64(0.5714285714285714), np.float64(0.7196969696969697), np.float64(0.9625), np.float64(0.8829787234042553), np.float64(0.9280205655526992)]
|
| 44 |
+
confusion_matrix = [[ 16 4 0 12 2 8 0 0 0]
|
| 45 |
+
[ 2 157 17 4 20 12 0 0 0]
|
| 46 |
+
[ 0 27 154 2 8 0 0 0 0]
|
| 47 |
+
[ 2 9 9 88 26 29 0 0 0]
|
| 48 |
+
[ 2 8 4 54 136 34 0 0 0]
|
| 49 |
+
[ 4 11 4 36 19 190 0 0 0]
|
| 50 |
+
[ 0 0 0 0 0 0 154 0 6]
|
| 51 |
+
[ 0 0 0 0 0 0 2 166 20]
|
| 52 |
+
[ 0 0 0 0 0 0 10 18 361]]
|
| 53 |
+
f1_macro = 0.7320272355350752
|
| 54 |
+
f1_weighted = 0.7700606908451018
|
| 55 |
+
evaluate 3
|
| 56 |
+
acc = 0.7574445046020574
|
| 57 |
+
acc_and_f1_macro = 0.7289343823474163
|
| 58 |
+
acc_and_f1_weighted = 0.7556290940626524
|
| 59 |
+
class_f1 = [np.float64(0.2962962962962963), np.float64(0.7600950118764846), np.float64(0.8042328042328042), np.float64(0.4280936454849498), np.float64(0.6113207547169812), np.float64(0.6468401486988847), np.float64(0.9325153374233127), np.float64(0.8947368421052632), np.float64(0.9296874999999999)]
|
| 60 |
+
class_p = [np.float64(0.6666666666666666), np.float64(0.7655502392344498), np.float64(0.8128342245989305), np.float64(0.47058823529411764), np.float64(0.5547945205479452), np.float64(0.635036496350365), np.float64(0.9156626506024096), np.float64(0.8854166666666666), np.float64(0.941952506596306)]
|
| 61 |
+
class_r = [np.float64(0.19047619047619047), np.float64(0.7547169811320755), np.float64(0.7958115183246073), np.float64(0.39263803680981596), np.float64(0.680672268907563), np.float64(0.6590909090909091), np.float64(0.95), np.float64(0.9042553191489362), np.float64(0.9177377892030848)]
|
| 62 |
+
confusion_matrix = [[ 8 4 0 12 4 14 0 0 0]
|
| 63 |
+
[ 0 160 16 2 20 14 0 0 0]
|
| 64 |
+
[ 0 27 152 4 8 0 0 0 0]
|
| 65 |
+
[ 2 5 9 64 47 36 0 0 0]
|
| 66 |
+
[ 0 6 4 30 162 36 0 0 0]
|
| 67 |
+
[ 2 7 6 24 51 174 0 0 0]
|
| 68 |
+
[ 0 0 0 0 0 0 152 2 6]
|
| 69 |
+
[ 0 0 0 0 0 0 2 170 16]
|
| 70 |
+
[ 0 0 0 0 0 0 12 20 357]]
|
| 71 |
+
f1_macro = 0.7004242600927751
|
| 72 |
+
f1_weighted = 0.7538136835232475
|
| 73 |
+
evaluate 4
|
| 74 |
+
acc = 0.7628586897671901
|
| 75 |
+
acc_and_f1_macro = 0.737212156890211
|
| 76 |
+
acc_and_f1_weighted = 0.7611269408184007
|
| 77 |
+
class_f1 = [np.float64(0.37288135593220334), np.float64(0.745920745920746), np.float64(0.8105263157894737), np.float64(0.42857142857142855), np.float64(0.6252354048964218), np.float64(0.6502835538752363), np.float64(0.9440993788819876), np.float64(0.8852459016393442), np.float64(0.9413265306122449)]
|
| 78 |
+
class_p = [np.float64(0.6470588235294118), np.float64(0.7373271889400922), np.float64(0.8148148148148148), np.float64(0.48091603053435117), np.float64(0.5665529010238908), np.float64(0.6490566037735849), np.float64(0.9382716049382716), np.float64(0.9101123595505618), np.float64(0.9341772151898734)]
|
| 79 |
+
class_r = [np.float64(0.2619047619047619), np.float64(0.7547169811320755), np.float64(0.806282722513089), np.float64(0.38650306748466257), np.float64(0.6974789915966386), np.float64(0.6515151515151515), np.float64(0.95), np.float64(0.8617021276595744), np.float64(0.9485861182519281)]
|
| 80 |
+
confusion_matrix = [[ 11 4 0 8 3 16 0 0 0]
|
| 81 |
+
[ 0 160 18 0 20 14 0 0 0]
|
| 82 |
+
[ 0 29 154 2 6 0 0 0 0]
|
| 83 |
+
[ 2 7 9 63 47 35 0 0 0]
|
| 84 |
+
[ 2 8 4 30 166 28 0 0 0]
|
| 85 |
+
[ 2 9 4 28 49 172 0 0 0]
|
| 86 |
+
[ 0 0 0 0 0 0 152 2 6]
|
| 87 |
+
[ 0 0 0 0 0 0 6 162 20]
|
| 88 |
+
[ 0 0 0 0 2 0 4 14 369]]
|
| 89 |
+
f1_macro = 0.7115656240132319
|
| 90 |
+
f1_weighted = 0.7593951918696112
|
| 91 |
+
evaluate 5
|
| 92 |
+
acc = 0.7661072008662696
|
| 93 |
+
acc_and_f1_macro = 0.7427259572257612
|
| 94 |
+
acc_and_f1_weighted = 0.7649645956371721
|
| 95 |
+
class_f1 = [np.float64(0.4), np.float64(0.7453703703703703), np.float64(0.8148148148148147), np.float64(0.4615384615384615), np.float64(0.6132264529058116), np.float64(0.6679035250463821), np.float64(0.9433962264150944), np.float64(0.891304347826087), np.float64(0.9365482233502538)]
|
| 96 |
+
class_p = [np.float64(0.6666666666666666), np.float64(0.7318181818181818), np.float64(0.8235294117647058), np.float64(0.48322147651006714), np.float64(0.5862068965517241), np.float64(0.6545454545454545), np.float64(0.9493670886075949), np.float64(0.9111111111111111), np.float64(0.924812030075188)]
|
| 97 |
+
class_r = [np.float64(0.2857142857142857), np.float64(0.7594339622641509), np.float64(0.806282722513089), np.float64(0.44171779141104295), np.float64(0.6428571428571429), np.float64(0.6818181818181818), np.float64(0.9375), np.float64(0.8723404255319149), np.float64(0.9485861182519281)]
|
| 98 |
+
confusion_matrix = [[ 12 4 0 10 2 14 0 0 0]
|
| 99 |
+
[ 0 161 17 0 20 14 0 0 0]
|
| 100 |
+
[ 0 29 154 2 6 0 0 0 0]
|
| 101 |
+
[ 2 8 8 72 40 33 0 0 0]
|
| 102 |
+
[ 2 8 4 37 153 34 0 0 0]
|
| 103 |
+
[ 2 10 4 28 40 180 0 0 0]
|
| 104 |
+
[ 0 0 0 0 0 0 150 2 8]
|
| 105 |
+
[ 0 0 0 0 0 0 2 164 22]
|
| 106 |
+
[ 0 0 0 0 0 0 6 14 369]]
|
| 107 |
+
f1_macro = 0.7193447135852529
|
| 108 |
+
f1_weighted = 0.7638219904080746
|
| 109 |
+
evaluate 6
|
| 110 |
+
acc = 0.7682728749323227
|
| 111 |
+
acc_and_f1_macro = 0.7443272819935483
|
| 112 |
+
acc_and_f1_weighted = 0.7677134096600673
|
| 113 |
+
class_f1 = [np.float64(0.37499999999999994), np.float64(0.7623529411764706), np.float64(0.8275862068965517), np.float64(0.4652567975830815), np.float64(0.6150712830957229), np.float64(0.6691729323308271), np.float64(0.9382716049382716), np.float64(0.8938547486033519), np.float64(0.9368686868686869)]
|
| 114 |
+
class_p = [np.float64(0.5454545454545454), np.float64(0.7605633802816901), np.float64(0.8387096774193549), np.float64(0.4583333333333333), np.float64(0.5968379446640316), np.float64(0.664179104477612), np.float64(0.926829268292683), np.float64(0.9411764705882353), np.float64(0.9205955334987593)]
|
| 115 |
+
class_r = [np.float64(0.2857142857142857), np.float64(0.7641509433962265), np.float64(0.8167539267015707), np.float64(0.4723926380368098), np.float64(0.634453781512605), np.float64(0.6742424242424242), np.float64(0.95), np.float64(0.851063829787234), np.float64(0.9537275064267352)]
|
| 116 |
+
confusion_matrix = [[ 12 4 0 12 2 12 0 0 0]
|
| 117 |
+
[ 0 162 14 2 20 14 0 0 0]
|
| 118 |
+
[ 0 25 156 2 8 0 0 0 0]
|
| 119 |
+
[ 2 8 8 77 34 34 0 0 0]
|
| 120 |
+
[ 2 8 4 43 151 30 0 0 0]
|
| 121 |
+
[ 6 6 4 32 38 178 0 0 0]
|
| 122 |
+
[ 0 0 0 0 0 0 152 0 8]
|
| 123 |
+
[ 0 0 0 0 0 0 4 160 24]
|
| 124 |
+
[ 0 0 0 0 0 0 8 10 371]]
|
| 125 |
+
f1_macro = 0.7203816890547738
|
| 126 |
+
f1_weighted = 0.7671539443878119
|
| 127 |
+
evaluate 7
|
| 128 |
+
acc = 0.7650243638332431
|
| 129 |
+
acc_and_f1_macro = 0.738707225890205
|
| 130 |
+
acc_and_f1_weighted = 0.7636431511337718
|
| 131 |
+
class_f1 = [np.float64(0.3448275862068965), np.float64(0.7517401392111369), np.float64(0.8086253369272237), np.float64(0.44594594594594594), np.float64(0.6067415730337079), np.float64(0.6377358490566037), np.float64(0.9454545454545454), np.float64(0.9190600522193213), np.float64(0.9513797634691195)]
|
| 132 |
+
class_p = [np.float64(0.625), np.float64(0.7397260273972602), np.float64(0.8333333333333334), np.float64(0.49624060150375937), np.float64(0.5472972972972973), np.float64(0.6353383458646616), np.float64(0.9176470588235294), np.float64(0.9025641025641026), np.float64(0.9731182795698925)]
|
| 133 |
+
class_r = [np.float64(0.23809523809523808), np.float64(0.7641509433962265), np.float64(0.7853403141361257), np.float64(0.4049079754601227), np.float64(0.680672268907563), np.float64(0.6401515151515151), np.float64(0.975), np.float64(0.9361702127659575), np.float64(0.9305912596401028)]
|
| 134 |
+
confusion_matrix = [[ 10 4 0 9 3 16 0 0 0]
|
| 135 |
+
[ 0 162 14 2 20 14 0 0 0]
|
| 136 |
+
[ 0 31 150 2 8 0 0 0 0]
|
| 137 |
+
[ 2 8 8 66 44 35 0 0 0]
|
| 138 |
+
[ 2 8 4 30 162 32 0 0 0]
|
| 139 |
+
[ 2 6 4 24 59 169 0 0 0]
|
| 140 |
+
[ 0 0 0 0 0 0 156 2 2]
|
| 141 |
+
[ 0 0 0 0 0 0 4 176 8]
|
| 142 |
+
[ 0 0 0 0 0 0 10 17 362]]
|
| 143 |
+
f1_macro = 0.7123900879471667
|
| 144 |
+
f1_weighted = 0.7622619384343003
|
| 145 |
+
evaluate 8
|
| 146 |
+
acc = 0.7623172712506767
|
| 147 |
+
acc_and_f1_macro = 0.734596760229649
|
| 148 |
+
acc_and_f1_weighted = 0.7605790096477796
|
| 149 |
+
class_f1 = [np.float64(0.3157894736842105), np.float64(0.7632183908045977), np.float64(0.802168021680217), np.float64(0.4413793103448276), np.float64(0.6078799249530957), np.float64(0.6305970149253731), np.float64(0.9454545454545454), np.float64(0.9095607235142119), np.float64(0.9458388375165125)]
|
| 150 |
+
class_p = [np.float64(0.6), np.float64(0.7443946188340808), np.float64(0.8314606741573034), np.float64(0.5039370078740157), np.float64(0.5491525423728814), np.float64(0.6213235294117647), np.float64(0.9176470588235294), np.float64(0.8844221105527639), np.float64(0.9728260869565217)]
|
| 151 |
+
class_r = [np.float64(0.21428571428571427), np.float64(0.7830188679245284), np.float64(0.774869109947644), np.float64(0.39263803680981596), np.float64(0.680672268907563), np.float64(0.6401515151515151), np.float64(0.975), np.float64(0.9361702127659575), np.float64(0.9203084832904884)]
|
| 152 |
+
confusion_matrix = [[ 9 4 0 9 4 16 0 0 0]
|
| 153 |
+
[ 0 166 14 0 18 14 0 0 0]
|
| 154 |
+
[ 0 33 148 2 8 0 0 0 0]
|
| 155 |
+
[ 2 8 8 64 44 37 0 0 0]
|
| 156 |
+
[ 2 6 4 28 162 36 0 0 0]
|
| 157 |
+
[ 2 6 4 24 59 169 0 0 0]
|
| 158 |
+
[ 0 0 0 0 0 0 156 2 2]
|
| 159 |
+
[ 0 0 0 0 0 0 4 176 8]
|
| 160 |
+
[ 0 0 0 0 0 0 10 21 358]]
|
| 161 |
+
f1_macro = 0.7068762492086212
|
| 162 |
+
f1_weighted = 0.7588407480448823
|
| 163 |
+
evaluate 9
|
| 164 |
+
acc = 0.7617758527341635
|
| 165 |
+
acc_and_f1_macro = 0.7358060820649521
|
| 166 |
+
acc_and_f1_weighted = 0.7604888182739137
|
| 167 |
+
class_f1 = [np.float64(0.3448275862068965), np.float64(0.7552447552447552), np.float64(0.8150134048257371), np.float64(0.44000000000000006), np.float64(0.6037735849056604), np.float64(0.6377358490566037), np.float64(0.9390243902439025), np.float64(0.9095607235142119), np.float64(0.9433465085638999)]
|
| 168 |
+
class_p = [np.float64(0.625), np.float64(0.7465437788018433), np.float64(0.8351648351648352), np.float64(0.48175182481751827), np.float64(0.547945205479452), np.float64(0.6353383458646616), np.float64(0.9166666666666666), np.float64(0.8844221105527639), np.float64(0.9675675675675676)]
|
| 169 |
+
class_r = [np.float64(0.23809523809523808), np.float64(0.7641509433962265), np.float64(0.7958115183246073), np.float64(0.4049079754601227), np.float64(0.6722689075630253), np.float64(0.6401515151515151), np.float64(0.9625), np.float64(0.9361702127659575), np.float64(0.9203084832904884)]
|
| 170 |
+
confusion_matrix = [[ 10 4 0 9 3 16 0 0 0]
|
| 171 |
+
[ 0 162 14 2 20 14 0 0 0]
|
| 172 |
+
[ 0 29 152 2 8 0 0 0 0]
|
| 173 |
+
[ 2 8 8 66 44 35 0 0 0]
|
| 174 |
+
[ 2 6 4 34 160 32 0 0 0]
|
| 175 |
+
[ 2 8 4 24 57 169 0 0 0]
|
| 176 |
+
[ 0 0 0 0 0 0 154 2 4]
|
| 177 |
+
[ 0 0 0 0 0 0 4 176 8]
|
| 178 |
+
[ 0 0 0 0 0 0 10 21 358]]
|
| 179 |
+
f1_macro = 0.7098363113957409
|
| 180 |
+
f1_weighted = 0.7592017838136639
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76ffdd1acf85d3e695fc365682b9e38b0d4ff61f1c531df5e0579b4792fd8dbf
|
| 3 |
+
size 498640484
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"bos_token": "<s>",
|
| 5 |
+
"cls_token": "<s>",
|
| 6 |
+
"do_lower_case": false,
|
| 7 |
+
"eos_token": "</s>",
|
| 8 |
+
"errors": "replace",
|
| 9 |
+
"is_local": false,
|
| 10 |
+
"mask_token": "<mask>",
|
| 11 |
+
"model_max_length": 512,
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"tokenizer_class": "RobertaTokenizer",
|
| 15 |
+
"trim_offsets": true,
|
| 16 |
+
"unk_token": "<unk>"
|
| 17 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab94a8c18cb3f6b61dcbec326bd70cc7c3f3c6b513588ef140b4d58ab1d99cd2
|
| 3 |
+
size 2769
|