JAEHYUK LEE commited on
Commit
8c48e24
·
1 Parent(s): f0ae5d2

감정 분석 모델 업로드

Browse files
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "\uae30\uc068",
15
+ "1": "\ub2f9\ud669",
16
+ "2": "\ubd84\ub178",
17
+ "3": "\ubd88\uc548",
18
+ "4": "\uc0c1\ucc98",
19
+ "5": "\uc2ac\ud514"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "\uae30\uc068": 0,
25
+ "\ub2f9\ud669": 1,
26
+ "\ubd84\ub178": 2,
27
+ "\ubd88\uc548": 3,
28
+ "\uc0c1\ucc98": 4,
29
+ "\uc2ac\ud514": 5
30
+ },
31
+ "layer_norm_eps": 1e-05,
32
+ "max_position_embeddings": 514,
33
+ "model_type": "roberta",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "pad_token_id": 1,
37
+ "position_embedding_type": "absolute",
38
+ "problem_type": "single_label_classification",
39
+ "tokenizer_class": "BertTokenizer",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.55.0",
42
+ "type_vocab_size": 1,
43
+ "use_cache": true,
44
+ "vocab_size": 32000
45
+ }
label_map.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "label2id": {
3
+ "기쁨": 0,
4
+ "당황": 1,
5
+ "분노": 2,
6
+ "불안": 3,
7
+ "상처": 4,
8
+ "슬픔": 5
9
+ },
10
+ "id2label": {
11
+ "0": "기쁨",
12
+ "1": "당황",
13
+ "2": "분노",
14
+ "3": "불안",
15
+ "4": "상처",
16
+ "5": "슬픔"
17
+ }
18
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a461ae0713548e5d447d880adeeccbab98ba9f6042bc62322ffd0f2f362a40e7
3
+ size 442515048
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "[SEP]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "[MASK]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "[PAD]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "[SEP]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[CLS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_basic_tokenize": true,
48
+ "do_lower_case": false,
49
+ "eos_token": "[SEP]",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "[MASK]",
52
+ "model_max_length": 512,
53
+ "never_split": null,
54
+ "pad_token": "[PAD]",
55
+ "sep_token": "[SEP]",
56
+ "strip_accents": null,
57
+ "tokenize_chinese_chars": true,
58
+ "tokenizer_class": "BertTokenizer",
59
+ "unk_token": "[UNK]"
60
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac293ec6402dcd2b10fa9ce22c5e955dab395703b4804a91bde23710da348cbb
3
+ size 5344
vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
테스트용.ipynb ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {
7
+ "id": "HtXIxG2kUpgO"
8
+ },
9
+ "outputs": [
10
+ {
11
+ "name": "stderr",
12
+ "output_type": "stream",
13
+ "text": [
14
+ "/home/2021111971/.conda/envs/gpu_env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
15
+ " from .autonotebook import tqdm as notebook_tqdm\n",
16
+ "2025-08-17 15:05:50.559882: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
17
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
18
+ "E0000 00:00:1755410750.582529 76530 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
19
+ "E0000 00:00:1755410750.589567 76530 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
20
+ "W0000 00:00:1755410750.608699 76530 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
21
+ "W0000 00:00:1755410750.608723 76530 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
22
+ "W0000 00:00:1755410750.608726 76530 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
23
+ "W0000 00:00:1755410750.608728 76530 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
24
+ "2025-08-17 15:05:50.614673: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
25
+ "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
26
+ ]
27
+ },
28
+ {
29
+ "name": "stdout",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "cuda\n"
33
+ ]
34
+ }
35
+ ],
36
+ "source": [
37
+ "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
38
+ "import torch, json, os\n",
39
+ "import torch.nn.functional as F\n",
40
+ "import re\n",
41
+ "\n",
42
+ "\n",
43
+ "LOAD_DIR = \"/home/2021111971/todai/model2/final_model\"\n",
44
+ "\n",
45
+ "try:\n",
46
+ " tok = AutoTokenizer.from_pretrained(LOAD_DIR)\n",
47
+ " model = AutoModelForSequenceClassification.from_pretrained(LOAD_DIR).eval()\n",
48
+ "except Exception as e:\n",
49
+ " print(f\"Error loading model or tokenizer from {LOAD_DIR}: {e}\")\n",
50
+ " print(\"Please ensure the path is correct and the directory contains the necessary model files.\")\n",
51
+ " raise\n",
52
+ "\n",
53
+ "\n",
54
+ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
55
+ "print(device)\n",
56
+ "model.to(device)\n",
57
+ "\n",
58
+ "with open(os.path.join(LOAD_DIR, \"label_map.json\"), \"r\", encoding=\"utf-8\") as f:\n",
59
+ " lm = json.load(f)\n",
60
+ "id2label = {int(k): v for k, v in lm[\"id2label\"].items()}\n",
61
+ "num_labels = len(id2label)"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": 2,
67
+ "metadata": {
68
+ "id": "VE8FmoqOUq3p"
69
+ },
70
+ "outputs": [],
71
+ "source": [
72
+ "\n",
73
+ "# ==== 2) 단문 예측 ====\n",
74
+ "def predict_emotion_and_print(text, max_len=256):\n",
75
+ " with torch.no_grad():\n",
76
+ " enc = tok(text, truncation=True, padding=True, max_length=max_len, return_tensors=\"pt\").to(device)\n",
77
+ " probs = F.softmax(model(**enc).logits, dim=-1).cpu().numpy()[0]\n",
78
+ " print(\"=== 감정 분석 결과 ===\")\n",
79
+ " for lab, pct in sorted({id2label[i]: float(probs[i]*100) for i in range(num_labels)}.items(),\n",
80
+ " key=lambda x: -x[1]):\n",
81
+ " print(f\"{lab:<5} : {pct:.2f}%\")\n",
82
+ " print(\"======================\")\n"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": 3,
88
+ "metadata": {
89
+ "id": "-O-jiHwiUvFx"
90
+ },
91
+ "outputs": [],
92
+ "source": [
93
+ "# ==== 3) 일기(여러 문장) → 문장 단위 집계 ====\n",
94
+ "# (문장마다 예측 → 개수 비율로 퍼센트 계산)\n",
95
+ "def split_sents(text):\n",
96
+ " # 마침표/물음표/느낌표/줄바�� 기준\n",
97
+ " return [s.strip() for s in re.split(r'[.?!\\n]', text) if s.strip()]\n",
98
+ "\n",
99
+ "def analyze_diary_percent(diary_text, max_len=256, return_details=False):\n",
100
+ " sents = split_sents(diary_text)\n",
101
+ " if not sents:\n",
102
+ " print(\"문장이 없습니다.\"); return {}\n",
103
+ "\n",
104
+ " counts = {id2label[i]: 0 for i in range(num_labels)}\n",
105
+ " details = []\n",
106
+ "\n",
107
+ " with torch.no_grad():\n",
108
+ " for s in sents:\n",
109
+ " enc = tok(s, truncation=True, padding=True, max_length=max_len, return_tensors=\"pt\").to(device)\n",
110
+ " logits = model(**enc).logits\n",
111
+ " pred = int(logits.argmax(-1).cpu().numpy()[0])\n",
112
+ " lab = id2label[pred]\n",
113
+ " counts[lab] += 1\n",
114
+ " if return_details: details.append((s, lab))\n",
115
+ "\n",
116
+ " total = sum(counts.values())\n",
117
+ " perc = {lab: round((counts.get(lab, 0) / total) * 100, 2) if total > 0 else 0.0 for lab in id2label.values()}\n",
118
+ "\n",
119
+ " print(\"=== 텍스트 기반 감정 분석 ===\")\n",
120
+ " for lab, pct in sorted(perc.items(), key=lambda x: -x[1]):\n",
121
+ " print(f\"{lab:<5}: {pct:5.2f}% \")\n",
122
+ " print(\"============================\")\n",
123
+ "\n"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": 4,
129
+ "metadata": {
130
+ "colab": {
131
+ "base_uri": "https://localhost:8080/"
132
+ },
133
+ "id": "dXzmKSI2UjOu",
134
+ "outputId": "02d6ce57-ce23-489a-f1ca-0a052ceb2dee"
135
+ },
136
+ "outputs": [
137
+ {
138
+ "name": "stdout",
139
+ "output_type": "stream",
140
+ "text": [
141
+ "=== 텍스트 기반 감정 분석 ===\n",
142
+ "기쁨 : 66.67% \n",
143
+ "슬픔 : 33.33% \n",
144
+ "당황 : 0.00% \n",
145
+ "분노 : 0.00% \n",
146
+ "불안 : 0.00% \n",
147
+ "상처 : 0.00% \n",
148
+ "============================\n"
149
+ ]
150
+ }
151
+ ],
152
+ "source": [
153
+ "diary_text = \"\"\"\n",
154
+ "아침에 프로젝트 승인 소식을 듣고 너무 기뻤다.\n",
155
+ "하지만 오후에는 친한 동료가 쇠사를 고민한다는 말을 듣고 마음이 먹먹해졌다.\n",
156
+ "퇴근길 노을을 보며 오늘 하루를 감사한 마음으로 마무리했다.\n",
157
+ "\"\"\"\n",
158
+ "analyze_diary_percent(diary_text)\n"
159
+ ]
160
+ }
161
+ ],
162
+ "metadata": {
163
+ "colab": {
164
+ "provenance": []
165
+ },
166
+ "kernelspec": {
167
+ "display_name": "gpu_env",
168
+ "language": "python",
169
+ "name": "python3"
170
+ },
171
+ "language_info": {
172
+ "codemirror_mode": {
173
+ "name": "ipython",
174
+ "version": 3
175
+ },
176
+ "file_extension": ".py",
177
+ "mimetype": "text/x-python",
178
+ "name": "python",
179
+ "nbconvert_exporter": "python",
180
+ "pygments_lexer": "ipython3",
181
+ "version": "3.11.13"
182
+ }
183
+ },
184
+ "nbformat": 4,
185
+ "nbformat_minor": 0
186
+ }