Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

README.md +59 -0
config.json +45 -0
model.safetensors +3 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +59 -0
training_args.bin +3 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+# Korean Emotion Classifier 😃😡😢😨😲😌
+본 모델은 한국어 텍스트를 **6가지 감정(분노, 불안, 슬픔, 평온, 당황, 기쁨)**으로 분류합니다.
+`klue/roberta-base` 기반으로 파인튜닝되었습니다.
+---
+## 📊 Evaluation Results
+| Emotion | Precision | Recall | F1-Score |
+|---------|-----------|--------|----------|
+| 분노    | 0.9801    | 0.9788 | 0.9795   |
+| 불안    | 0.9864    | 0.9848 | 0.9856   |
+| 슬픔    | 0.9837    | 0.9854 | 0.9845   |
+| 평온    | 0.9782    | 0.9750 | 0.9766   |
+| 당황    | 0.9607    | 0.9668 | 0.9652   |
+| 기쁨    | 0.9857    | 0.9886 | 0.9872   |
+**Accuracy**: 0.9831
+**Macro Avg**: Precision=0.9791 / Recall=0.9804 / F1=0.9798
+**Weighted Avg**: Precision=0.9831 / Recall=0.9831 / F1=0.9831
+```python
+from transformers import pipeline
+import torch
+model_id = "Seonghaa/korean-emotion-classifier-roberta"
+device = 0 if torch.cuda.is_available() else -1  # GPU 있으면 0, 없으면 CPU(-1)
+clf = pipeline(
+    "text-classification",
+    model=model_id,
+    tokenizer=model_id,
+    device=device
+)
+texts = [
+    "오늘 길에서 10만원을 주웠어",
+    "오늘 친구들이랑 노래방에 갔어",
+    "오늘 시험 망쳤어",
+]
+for t in texts:
+    pred = clf(t, truncation=True, max_length=256)[0]
+    print(f"입력: {t}")
+    print(f"→ 예측 감정: {pred['label']}, 점수: {pred['score']:.4f}
+")
+```
+출력 예시:
+입력: 오늘 길에서 10만원을 주웠어
+→ 예측 감정: 기쁨, 점수: 0.9619
+입력: 오늘 친구들이랑 노래방에 갔어
+→ 예측 감정: 기쁨, 점수: 0.9653
+입력: 오늘 시험 망쳤어
+→ 예측 감정: 슬픔, 점수: 0.9602

config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "_name_or_path": "klue/roberta-base",
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "\ubd84\ub178",
+    "1": "\ubd88\uc548",
+    "2": "\uc2ac\ud514",
+    "3": "\ud3c9\uc628",
+    "4": "\ub2f9\ud669",
+    "5": "\uae30\uc068"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "\uae30\uc068": 5,
+    "\ub2f9\ud669": 4,
+    "\ubd84\ub178": 0,
+    "\ubd88\uc548": 1,
+    "\uc2ac\ud514": 2,
+    "\ud3c9\uc628": 3
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "BertTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 32000
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7504fe9cd71500537d81c8a96aee67a654427c3badcc9c6b7b849d3dd4ba3971
+size 442515048

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c345af2764be8e4c253e4182835df1b916df5631874f2af26e81c6bd9e47e82e
+size 5280

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff