감정 분석 모델 업로드

Files changed (9) hide show

config.json +45 -0
label_map.json +18 -0
model.safetensors +3 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +60 -0
training_args.bin +3 -0
vocab.txt +0 -0
테스트용.ipynb +186 -0

config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "\uae30\uc068",
+    "1": "\ub2f9\ud669",
+    "2": "\ubd84\ub178",
+    "3": "\ubd88\uc548",
+    "4": "\uc0c1\ucc98",
+    "5": "\uc2ac\ud514"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "\uae30\uc068": 0,
+    "\ub2f9\ud669": 1,
+    "\ubd84\ub178": 2,
+    "\ubd88\uc548": 3,
+    "\uc0c1\ucc98": 4,
+    "\uc2ac\ud514": 5
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "tokenizer_class": "BertTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.0",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 32000
+}

label_map.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "label2id": {
+    "기쁨": 0,
+    "당황": 1,
+    "분노": 2,
+    "불안": 3,
+    "상처": 4,
+    "슬픔": 5
+  },
+  "id2label": {
+    "0": "기쁨",
+    "1": "당황",
+    "2": "분노",
+    "3": "불안",
+    "4": "상처",
+    "5": "슬픔"
+  }
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a461ae0713548e5d447d880adeeccbab98ba9f6042bc62322ffd0f2f362a40e7
+size 442515048

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac293ec6402dcd2b10fa9ce22c5e955dab395703b4804a91bde23710da348cbb
+size 5344

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

테스트용.ipynb ADDED Viewed

	@@ -0,0 +1,186 @@

+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "id": "HtXIxG2kUpgO"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/home/2021111971/.conda/envs/gpu_env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+            "  from .autonotebook import tqdm as notebook_tqdm\n",
+            "2025-08-17 15:05:50.559882: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+            "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
+            "E0000 00:00:1755410750.582529   76530 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+            "E0000 00:00:1755410750.589567   76530 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+            "W0000 00:00:1755410750.608699   76530 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "W0000 00:00:1755410750.608723   76530 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "W0000 00:00:1755410750.608726   76530 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "W0000 00:00:1755410750.608728   76530 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "2025-08-17 15:05:50.614673: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+            "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "cuda\n"
+          ]
+        }
+      ],
+      "source": [
+        "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
+        "import torch, json, os\n",
+        "import torch.nn.functional as F\n",
+        "import re\n",
+        "\n",
+        "\n",
+        "LOAD_DIR = \"/home/2021111971/todai/model2/final_model\"\n",
+        "\n",
+        "try:\n",
+        "    tok = AutoTokenizer.from_pretrained(LOAD_DIR)\n",
+        "    model = AutoModelForSequenceClassification.from_pretrained(LOAD_DIR).eval()\n",
+        "except Exception as e:\n",
+        "    print(f\"Error loading model or tokenizer from {LOAD_DIR}: {e}\")\n",
+        "    print(\"Please ensure the path is correct and the directory contains the necessary model files.\")\n",
+        "    raise\n",
+        "\n",
+        "\n",
+        "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+        "print(device)\n",
+        "model.to(device)\n",
+        "\n",
+        "with open(os.path.join(LOAD_DIR, \"label_map.json\"), \"r\", encoding=\"utf-8\") as f:\n",
+        "    lm = json.load(f)\n",
+        "id2label = {int(k): v for k, v in lm[\"id2label\"].items()}\n",
+        "num_labels = len(id2label)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "VE8FmoqOUq3p"
+      },
+      "outputs": [],
+      "source": [
+        "\n",
+        "# ==== 2) 단문 예측 ====\n",
+        "def predict_emotion_and_print(text, max_len=256):\n",
+        "    with torch.no_grad():\n",
+        "        enc = tok(text, truncation=True, padding=True, max_length=max_len, return_tensors=\"pt\").to(device)\n",
+        "        probs = F.softmax(model(**enc).logits, dim=-1).cpu().numpy()[0]\n",
+        "    print(\"=== 감정 분석 결과 ===\")\n",
+        "    for lab, pct in sorted({id2label[i]: float(probs[i]*100) for i in range(num_labels)}.items(),\n",
+        "                           key=lambda x: -x[1]):\n",
+        "        print(f\"{lab:<5} : {pct:.2f}%\")\n",
+        "    print(\"======================\")\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "id": "-O-jiHwiUvFx"
+      },
+      "outputs": [],
+      "source": [
+        "# ==== 3) 일기(여러 문장) → 문장 단위 집계 ====\n",
+        "# (문장마다 예측 → 개수 비율로 퍼센트 계산)\n",
+        "def split_sents(text):\n",
+        "    # 마침표/물음표/느낌표/줄바�� 기준\n",
+        "    return [s.strip() for s in re.split(r'[.?!\\n]', text) if s.strip()]\n",
+        "\n",
+        "def analyze_diary_percent(diary_text, max_len=256, return_details=False):\n",
+        "    sents = split_sents(diary_text)\n",
+        "    if not sents:\n",
+        "        print(\"문장이 없습니다.\"); return {}\n",
+        "\n",
+        "    counts = {id2label[i]: 0 for i in range(num_labels)}\n",
+        "    details = []\n",
+        "\n",
+        "    with torch.no_grad():\n",
+        "        for s in sents:\n",
+        "            enc = tok(s, truncation=True, padding=True, max_length=max_len, return_tensors=\"pt\").to(device)\n",
+        "            logits = model(**enc).logits\n",
+        "            pred = int(logits.argmax(-1).cpu().numpy()[0])\n",
+        "            lab = id2label[pred]\n",
+        "            counts[lab] += 1\n",
+        "            if return_details: details.append((s, lab))\n",
+        "\n",
+        "    total = sum(counts.values())\n",
+        "    perc = {lab: round((counts.get(lab, 0) / total) * 100, 2) if total > 0 else 0.0 for lab in id2label.values()}\n",
+        "\n",
+        "    print(\"=== 텍스트 기반 감정 분석 ===\")\n",
+        "    for lab, pct in sorted(perc.items(), key=lambda x: -x[1]):\n",
+        "        print(f\"{lab:<5}: {pct:5.2f}% \")\n",
+        "    print(\"============================\")\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dXzmKSI2UjOu",
+        "outputId": "02d6ce57-ce23-489a-f1ca-0a052ceb2dee"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "=== 텍스트 기반 감정 분석 ===\n",
+            "기쁨   : 66.67% \n",
+            "슬픔   : 33.33% \n",
+            "당황   :  0.00% \n",
+            "분노   :  0.00% \n",
+            "불안   :  0.00% \n",
+            "상처   :  0.00% \n",
+            "============================\n"
+          ]
+        }
+      ],
+      "source": [
+        "diary_text = \"\"\"\n",
+        "아침에 프로젝트 승인 소식을 듣고 너무 기뻤다.\n",
+        "하지만 오후에는 친한 동료가 쇠사를 고민한다는 말을 듣고 마음이 먹먹해졌다.\n",
+        "퇴근길 노을을 보며 오늘 하루를 감사한 마음으로 마무리했다.\n",
+        "\"\"\"\n",
+        "analyze_diary_percent(diary_text)\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "gpu_env",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.11.13"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}