jonghhhh commited on
Commit
d3f5ca9
·
verified ·
1 Parent(s): c83b252

Upload 7 files

Browse files
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "beomi/KcBERT-v2023",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3",
18
+ "4": "LABEL_4",
19
+ "5": "LABEL_5",
20
+ "6": "LABEL_6",
21
+ "7": "LABEL_7"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "label2id": {
26
+ "LABEL_0": 0,
27
+ "LABEL_1": 1,
28
+ "LABEL_2": 2,
29
+ "LABEL_3": 3,
30
+ "LABEL_4": 4,
31
+ "LABEL_5": 5,
32
+ "LABEL_6": 6,
33
+ "LABEL_7": 7
34
+ },
35
+ "layer_norm_eps": 1e-05,
36
+ "max_position_embeddings": 514,
37
+ "model_type": "roberta",
38
+ "num_attention_heads": 12,
39
+ "num_hidden_layers": 12,
40
+ "pad_token_id": 1,
41
+ "position_embedding_type": "absolute",
42
+ "problem_type": "multi_label_classification",
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.31.0",
45
+ "type_vocab_size": 1,
46
+ "use_cache": true,
47
+ "vocab_size": 50265
48
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ pandas
2
+ torch
3
+ transformers
4
+ scikit-learn
5
+ datasets
6
+ numpy
7
+ streamlit
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "errors": "replace",
8
+ "mask_token": "<mask>",
9
+ "model_max_length": 1000000000000000019884624838656,
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "tokenizer_class": "RobertaTokenizer",
13
+ "trim_offsets": true,
14
+ "unk_token": "<unk>"
15
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
모델학습평가결과.txt ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ethics/텍스트윤리검증데이터/aihub_텍스트윤리검증데이터_중복제거_061424.csv
2
+
3
+ [340225, 42528, 42529]
4
+
5
+ id2label=
6
+ {'IMMORAL_NONE': 0,
7
+ 'CRIME': 1,
8
+ 'SEXUAL': 2,
9
+ 'HATE': 3,
10
+ 'DISCRIMINATION': 4,
11
+ 'CENSURE': 5,
12
+ 'ABUSE': 6,
13
+ 'VIOLENCE': 7}
14
+
15
+ ** 'eval_mean_accuracy': 0.8997096099132357
16
+ Test Results: {'eval_loss': 0.2841951251029968, 'eval_mean_accuracy': 0.8997096099132357, 'eval_f1': 0.644721372735319, 'eval_confusion_matrix': [[[20635, 3291], [4694, 13909]], [[41636, 128], [677, 88]], [[39671, 594], [1215, 1049]], [[34075, 1806], [4632, 2016]], [[37842, 823], [3098, 766]], [[18147, 4960], [5105, 14317]], [[40260, 394], [1274, 601]], [[40177, 471], [960, 921]]], 'eval_accuracy_0': 0.8122457617155353, 'eval_accuracy_1': 0.9810717392837829, 'eval_accuracy_2': 0.9574643184650474, 'eval_accuracy_3': 0.8486209410049613, 'eval_accuracy_4': 0.9078040866232453, 'eval_accuracy_5': 0.763337957628912, 'eval_accuracy_6': 0.960779703261304, 'eval_accuracy_7': 0.9663523713230971, 'eval_runtime': 75.362, 'eval_samples_per_second': 564.33, 'eval_steps_per_second': 35.283, 'epoch': 10.0}
17
+
18
+ Confusion Matrix for Label IMMORAL_NONE:
19
+ [[20635 3291]
20
+ [ 4694 13909]]
21
+ Confusion Matrix for Label CRIME:
22
+ [[41636 128]
23
+ [ 677 88]]
24
+ Confusion Matrix for Label SEXUAL:
25
+ [[39671 594]
26
+ [ 1215 1049]]
27
+ Confusion Matrix for Label HATE:
28
+ [[34075 1806]
29
+ [ 4632 2016]]
30
+ Confusion Matrix for Label DISCRIMINATION:
31
+ [[37842 823]
32
+ [ 3098 766]]
33
+ Confusion Matrix for Label CENSURE:
34
+ [[18147 4960]
35
+ [ 5105 14317]]
36
+ Confusion Matrix for Label ABUSE:
37
+ [[40260 394]
38
+ [ 1274 601]]
39
+ Confusion Matrix for Label VIOLENCE:
40
+ [[40177 471]
41
+ [ 960 921]]
42
+
43
+
44
+ 각 레이블별 정확도와 F1 점수
45
+ IMMORAL_NONE:
46
+ Accuracy: 0.7582
47
+ F1 Score: 0.7771
48
+ CRIME:
49
+ Accuracy: 0.9853
50
+ F1 Score: 0.1807
51
+ SEXUAL:
52
+ Accuracy: 0.9575
53
+ F1 Score: 0.5375
54
+ HATE:
55
+ Accuracy: 0.8760
56
+ F1 Score: 0.3861
57
+ DISCRIMINATION:
58
+ Accuracy: 0.9164
59
+ F1 Score: 0.2805
60
+ CENSURE:
61
+ Accuracy: 0.7706
62
+ F1 Score: 0.7397
63
+ ABUSE:
64
+ Accuracy: 0.9551
65
+ F1 Score: 0.4176
66
+ VIOLENCE:
67
+ Accuracy: 0.9748
68
+ F1 Score: 0.5619