Changes to be committed: 모델 학습 완료

new file: DIVA_Model_dict.pt
new file: DIVA_Model_full.pt
modified: DIVA_dataset.pt
modified: preprocessing.ipynb
modified: train.ipynb

Files changed (5) hide show

DIVA_Model_dict.pt +3 -0
DIVA_Model_full.pt +3 -0
DIVA_dataset.pt +1 -1
preprocessing.ipynb +10 -10
train.ipynb +164 -25

DIVA_Model_dict.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:547e6a824560bb6f5ef6b097f468fbe6a5ec24efc9ff3d028d1e1ecedb35a0d0
+size 1517753

DIVA_Model_full.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be154505e564927f6d12e0832bf43bb3f17082c97f7408b5692b8af4e9eb851c
+size 1519609

DIVA_dataset.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d5843f8e01521ce7f7177a76674a83c6444abf577418df5de3db9a34cb5e08f
 size 328142

 version https://git-lfs.github.com/spec/v1
+oid sha256:5e956b6342df72c271210930fb6ce094c75c61b5c9d4e155966687599912791b
 size 328142

preprocessing.ipynb CHANGED Viewed

@@ -1502,7 +1502,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "id": "f7b77c0c",
    "metadata": {},
    "outputs": [],
@@ -1540,7 +1540,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "id": "769af33a",
    "metadata": {},
    "outputs": [
@@ -1548,13 +1548,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "min target: -2\n",
-      "unique values: tensor([ -2,  -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,\n",
-      "         12,  16,  22,  31,  35,  38,  43,  44,  45,  46,  47,  48,  49,  50,\n",
-      "         51,  52,  53,  54,  55,  56,  57,  58,  59,  61,  62,  63,  64,  65,\n",
-      "         66,  67,  68,  69,  70,  71,  72,  73,  75,  76,  77,  78,  79,  80,\n",
-      "         81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,\n",
-      "         95,  96, 100, 102, 130, 202, 302])\n"
      ]
     }
    ],
@@ -1565,7 +1565,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "id": "4f5f5dc1",
    "metadata": {},
    "outputs": [],

   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "f7b77c0c",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "769af33a",
    "metadata": {},
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "min target: 0\n",
+      "unique values: tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  16,\n",
+      "         22,  31,  35,  38,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,\n",
+      "         53,  54,  55,  56,  57,  58,  59,  62,  63,  64,  65,  66,  67,  68,\n",
+      "         69,  70,  71,  72,  73,  75,  76,  77,  78,  79,  80,  81,  82,  83,\n",
+      "         84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96, 100,\n",
+      "        102, 130, 202, 302])\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "id": "4f5f5dc1",
    "metadata": {},
    "outputs": [],

train.ipynb CHANGED Viewed

@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "id": "630dd7ad",
    "metadata": {},
    "outputs": [],
@@ -42,17 +42,17 @@
     "import torch.nn as nn\n",
     "import torch\n",
     "\n",
-    "#device = torch.device(\"cuda\") # GPU 사용\n",
-    "device = torch.device(\"cpu\") # CPU 사용\n",
     "\n",
-    "model = Vector2MIDI(25, 128, 320).to(device)\n",
     "criterion = nn.CrossEntropyLoss(ignore_index=0) # 손실함수 패딩(0) 무시\n",
     "optimizer = optim.Adam(model.parameters(), lr=1e-3)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "id": "f8c4a838",
    "metadata": {},
    "outputs": [
@@ -87,7 +87,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "id": "4e0ea127",
    "metadata": {},
    "outputs": [],
@@ -124,7 +124,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "id": "16a14b5f",
    "metadata": {},
    "outputs": [
@@ -132,26 +132,142 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "input to forward: torch.Size([8, 25])\n",
-      "outputs shape: torch.Size([8, 1185, 320])\n",
-      "Y_batch shape: torch.Size([8, 1185])\n",
-      "outputs(view) shape: torch.Size([9480, 320])\n",
-      "targets(view) shape: torch.Size([9480])\n"
      ]
     },
     {
-     "ename": "IndexError",
-     "evalue": "Target -1 is out of bounds.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mIndexError\u001b[39m                                Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[12]\u001b[39m\u001b[32m, line 25\u001b[39m\n\u001b[32m     22\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33moutputs(view) shape:\u001b[39m\u001b[33m\"\u001b[39m, outputs.shape)\n\u001b[32m     23\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mtargets(view) shape:\u001b[39m\u001b[33m\"\u001b[39m, targets.shape)\n\u001b[32m---> \u001b[39m\u001b[32m25\u001b[39m loss_f = \u001b[43mcriterion\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtargets\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     26\u001b[39m loss_f.backward()\n\u001b[32m     27\u001b[39m optimizer.step()\n",
-      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\rrayy\\anaconda3\\envs\\diva\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1751\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1749\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m   1750\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1751\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\rrayy\\anaconda3\\envs\\diva\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1762\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1757\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m   1758\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m   1759\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m   1760\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m   1761\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1762\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1764\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m   1765\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
-      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\rrayy\\anaconda3\\envs\\diva\\Lib\\site-packages\\torch\\nn\\modules\\loss.py:1297\u001b[39m, in \u001b[36mCrossEntropyLoss.forward\u001b[39m\u001b[34m(self, input, target)\u001b[39m\n\u001b[32m   1296\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor, target: Tensor) -> Tensor:\n\u001b[32m-> \u001b[39m\u001b[32m1297\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcross_entropy\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   1298\u001b[39m \u001b[43m        \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m   1299\u001b[39m \u001b[43m        \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1300\u001b[39m \u001b[43m        \u001b[49m\u001b[43mweight\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1301\u001b[39m \u001b[43m        \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1302\u001b[39m \u001b[43m        \u001b[49m\u001b[43mreduction\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mreduction\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1303\u001b[39m \u001b[43m        \u001b[49m\u001b[43mlabel_smoothing\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mlabel_smoothing\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1304\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
-      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\rrayy\\anaconda3\\envs\\diva\\Lib\\site-packages\\torch\\nn\\functional.py:3494\u001b[39m, in \u001b[36mcross_entropy\u001b[39m\u001b[34m(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)\u001b[39m\n\u001b[32m   3492\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m size_average \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m reduce \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m   3493\u001b[39m     reduction = _Reduction.legacy_get_string(size_average, reduce)\n\u001b[32m-> \u001b[39m\u001b[32m3494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_C\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_nn\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcross_entropy_loss\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   3495\u001b[39m \u001b[43m    \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m   3496\u001b[39m \u001b[43m    \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   3497\u001b[39m \u001b[43m    \u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   3498\u001b[39m \u001b[43m    \u001b[49m\u001b[43m_Reduction\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_enum\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreduction\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   3499\u001b[39m \u001b[43m    \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   3500\u001b[39m \u001b[43m    \u001b[49m\u001b[43mlabel_smoothing\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   3501\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "\u001b[31mIndexError\u001b[39m: Target -1 is out of bounds."
      ]
     }
    ],
@@ -202,7 +318,30 @@
     "\n",
     "2. 토큰 매핑 수정\n",
     "- 지금 vocab_size=128이면 유효 인덱스는 0 ~ 127만 가능\n",
-    "- Rest나 특수 심볼 때문에 128이 들어갔다면 vocab_size를 129 이상으로 늘려야 함"
    ]
   }
  ],

   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "630dd7ad",
    "metadata": {},
    "outputs": [],
     "import torch.nn as nn\n",
     "import torch\n",
     "\n",
+    "device = torch.device(\"cuda\") # GPU 사용\n",
+    "#device = torch.device(\"cpu\") # CPU 사용\n",
     "\n",
+    "model = Vector2MIDI(25, 128, 303).to(device)\n",
     "criterion = nn.CrossEntropyLoss(ignore_index=0) # 손실함수 패딩(0) 무시\n",
     "optimizer = optim.Adam(model.parameters(), lr=1e-3)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "f8c4a838",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "id": "4e0ea127",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "16a14b5f",
    "metadata": {},
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "input to forward: torch.Size([8, 25])\n"
      ]
     },
     {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([2, 25])\n",
+      "outputs shape: torch.Size([2, 1185, 303])\n",
+      "Y_batch shape: torch.Size([2, 1185])\n",
+      "outputs(view) shape: torch.Size([2370, 303])\n",
+      "targets(view) shape: torch.Size([2370])\n",
+      "Epoch 1, Loss: 5.5885\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([2, 25])\n",
+      "outputs shape: torch.Size([2, 1185, 303])\n",
+      "Y_batch shape: torch.Size([2, 1185])\n",
+      "outputs(view) shape: torch.Size([2370, 303])\n",
+      "targets(view) shape: torch.Size([2370])\n",
+      "Epoch 2, Loss: 4.6946\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([2, 25])\n",
+      "outputs shape: torch.Size([2, 1185, 303])\n",
+      "Y_batch shape: torch.Size([2, 1185])\n",
+      "outputs(view) shape: torch.Size([2370, 303])\n",
+      "targets(view) shape: torch.Size([2370])\n",
+      "Epoch 3, Loss: 3.0288\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([2, 25])\n",
+      "outputs shape: torch.Size([2, 1185, 303])\n",
+      "Y_batch shape: torch.Size([2, 1185])\n",
+      "outputs(view) shape: torch.Size([2370, 303])\n",
+      "targets(view) shape: torch.Size([2370])\n",
+      "Epoch 4, Loss: 2.9275\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([8, 25])\n",
+      "outputs shape: torch.Size([8, 1185, 303])\n",
+      "Y_batch shape: torch.Size([8, 1185])\n",
+      "outputs(view) shape: torch.Size([9480, 303])\n",
+      "targets(view) shape: torch.Size([9480])\n",
+      "input to forward: torch.Size([2, 25])\n",
+      "outputs shape: torch.Size([2, 1185, 303])\n",
+      "Y_batch shape: torch.Size([2, 1185])\n",
+      "outputs(view) shape: torch.Size([2370, 303])\n",
+      "targets(view) shape: torch.Size([2370])\n",
+      "Epoch 5, Loss: 2.8112\n"
      ]
     }
    ],
     "\n",
     "2. 토큰 매핑 수정\n",
     "- 지금 vocab_size=128이면 유효 인덱스는 0 ~ 127만 가능\n",
+    "- Rest나 특수 심볼 때문에 128이 들어갔다면 vocab_size를 129 이상으로 늘려야 함\n",
+    "\n",
+    "고침!!!!!!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e610b924",
+   "metadata": {},
+   "source": [
+    "## 모델 저장"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "da89b45a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "torch.save(model.state_dict(), 'DIVA_Model_dict.pt') # 모델 가중치, 매개변수 저장\n",
+    "torch.save(model, 'DIVA_Model_full.pt') # 모델 전체 저장"
    ]
   }
  ],