Wing4 commited on
Commit
0fbc8cf
·
verified ·
1 Parent(s): 6d467ea

Upload Phase 1 fine-tuned KcELECTRA Steam Aspect Classifier

Browse files
README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🎮 KcELECTRA Steam Review Aspect Classifier (Phase 1)
2
+
3
+ Fine-tuned model based on **beomi/KcELECTRA-base**
4
+ for **Aspect-Based Sentiment Analysis (ABSA)** on Steam game reviews.
5
+
6
+ ## 📘 Model Info
7
+ - Base model: `beomi/KcELECTRA-base`
8
+ - Task: Multi-label classification (6 aspects)
9
+ - Labels:
10
+ - STORY
11
+ - OPTIMIZATION
12
+ - GRAPHICS
13
+ - PRICE_VALUE
14
+ - BALANCE
15
+ - ENGAGEMENT
16
+
17
+ ## ⚙️ Training
18
+ - Dataset: Custom labeled Steam reviews (2,349 samples)
19
+ - Loss: BCEWithLogitsLoss
20
+ - Epochs: 5
21
+ - LR: 2e-5
22
+ - Batch size: 16
23
+
24
+ ## 🧠 Usage Example
25
+ ```python
26
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
27
+
28
+ model = AutoModelForSequenceClassification.from_pretrained("Wing4/kcelectra-steam-aspect-classifier")
29
+ tokenizer = AutoTokenizer.from_pretrained("Wing4/kcelectra-steam-aspect-classifier")
30
+
31
+ inputs = tokenizer("그래픽은 좋지만 최적화가 별로야", return_tensors="pt")
32
+ outputs = model(**inputs)
33
+ print(outputs.logits.sigmoid()) # 각 측면별 확률
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "electra",
3
+ "architectures": [
4
+ "AspectClassifier"
5
+ ],
6
+ "hidden_size": 768,
7
+ "num_labels": 6,
8
+ "problem_type": "multi_label_classification",
9
+ "base_model_name": "beomi/KcELECTRA-base",
10
+ "id2label": {
11
+ "0": "STORY",
12
+ "1": "OPTIMIZATION",
13
+ "2": "GRAPHICS",
14
+ "3": "PRICE_VALUE",
15
+ "4": "BALANCE",
16
+ "5": "ENGAGEMENT"
17
+ },
18
+ "label2id": {
19
+ "STORY": 0,
20
+ "OPTIMIZATION": 1,
21
+ "GRAPHICS": 2,
22
+ "PRICE_VALUE": 3,
23
+ "BALANCE": 4,
24
+ "ENGAGEMENT": 5
25
+ }
26
+ }
phase1_aspect_classifier.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f87fb67e775644c306fa43b12a2b039f5ee390fcd7e5c0142f562fbe242dfce7
3
+ size 434067534
phase2_sentiment_classifier.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9683dfbcec037ac9e1e0a708a53f3da0d99fc584e77cbb38ac6a930502242b8
3
+ size 434086645
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ff4aada1398c5a2e7c54e8c6f553f00547713e4ecfd75c3e3a166854167b607
3
+ size 434062463
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[PAD]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "PreTrainedTokenizerFast",
57
+ "unk_token": "[UNK]"
58
+ }