{ "cells": [ { "cell_type": "code", "execution_count": 9, "id": "da16a2e2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✅ CUDA 사용 가능: True\n", "🔢 CUDA 장치 수: 1\n", "🔥 PyTorch 버전: 2.7.1+cu128\n", "🖥️ CUDA 장치 이름: NVIDIA GeForce RTX 3060 Laptop GPU\n", "🧱 PyTorch 빌드된 CUDA 버전: 12.8\n" ] } ], "source": [ "# CUDA test\n", "import torch\n", "\n", "print(\"✅ CUDA 사용 가능:\", torch.cuda.is_available())\n", "print(\"🔢 CUDA 장치 수:\", torch.cuda.device_count())\n", "print(\"🔥 PyTorch 버전:\", torch.__version__)\n", "if torch.cuda.is_available():\n", " print(\"🖥️ CUDA 장치 이름:\", torch.cuda.get_device_name(0))\n", " print(\"🧱 PyTorch 빌드된 CUDA 버전:\", torch.version.cuda) # type: ignore\n", "else:\n", " print(\"⚠️ CUDA를 인식하지 못했습니다.\")\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "630dd7ad", "metadata": {}, "outputs": [], "source": [ "from Models.Vector2MIDI import Vector2MIDI # 클래스 정의가 필요\n", "import torch.optim as optim\n", "import torch\n", "\n", "device = torch.device(\"cuda\") # GPU 사용\n", "#device = torch.device(\"cpu\") # CPU 사용\n", "\n", "model = Vector2MIDI(hidden_dim=1024).to(device)\n", "optimizer = optim.Adam(model.parameters(), lr=15e-5)" ] }, { "cell_type": "code", "execution_count": 2, "id": "f8c4a838", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "X_tensor shape: torch.Size([34, 25])\n", "Y_tensor shape: torch.Size([34, 128, 7])\n" ] } ], "source": [ "# 전처리 데이터 로드\n", "from torch.utils.data import DataLoader\n", "from utility.dataset import MIDIDataset\n", "import torch\n", "\n", "data = torch.load(\"DIVA_dataset.pt\")\n", "X_tensor = data[\"X\"]\n", "Y_tensor = data[\"Y\"]\n", "\n", "print(\"X_tensor shape:\", X_tensor.shape)\n", "print(\"Y_tensor shape:\", Y_tensor.shape)\n", "\n", "dataset = MIDIDataset(X_tensor, Y_tensor) # 객체를 만들어서 쉽게 tensor를 꺼낼 수 있게 함\n", "dataloader = DataLoader(dataset, batch_size=8, shuffle=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "4e0ea127", "metadata": {}, "outputs": [], "source": [ "# 디버깅 환경변수 설정\n", "\n", "import os\n", "\n", "os.environ['CUDA_LAUNCH_BLOCKING'] = \"1\"\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" ] }, { "cell_type": "code", "execution_count": 4, "id": "16a14b5f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\rrayy\\AppData\\Local\\Temp\\ipykernel_20852\\858792003.py:18: UserWarning: Anomaly Detection has been enabled. This mode will increase the runtime and should only be enabled for debugging.\n", " with torch.autograd.detect_anomaly(): # loss nan 발생 시 연산오류 출력\n", "c:\\Users\\rrayy\\anaconda3\\envs\\diva\\Lib\\site-packages\\pysdtw\\sdtw_cuda.py:19: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\pytorch\\torch\\csrc\\tensor\\python_tensor.cpp:80.)\n", " gamma = torch.cuda.FloatTensor([gamma])\n", "c:\\Users\\rrayy\\anaconda3\\envs\\diva\\Lib\\site-packages\\numba\\cuda\\dispatcher.py:536: NumbaPerformanceWarning: \u001b[1mGrid size 8 will likely result in GPU under-utilization due to low occupancy.\u001b[0m\n", " warn(NumbaPerformanceWarning(msg))\n", "C:\\Users\\rrayy\\AppData\\Local\\Temp\\ipykernel_20852\\858792003.py:18: UserWarning: Anomaly Detection has been enabled. This mode will increase the runtime and should only be enabled for debugging.\n", " with torch.autograd.detect_anomaly(): # loss nan 발생 시 연산오류 출력\n", "c:\\Users\\rrayy\\anaconda3\\envs\\diva\\Lib\\site-packages\\numba\\cuda\\dispatcher.py:536: NumbaPerformanceWarning: \u001b[1mGrid size 2 will likely result in GPU under-utilization due to low occupancy.\u001b[0m\n", " warn(NumbaPerformanceWarning(msg))\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1, Loss: 4.0590, Best: 4.0590\n", "Epoch 2, Loss: 4.8194, Best: 4.0590\n", "Epoch 3, Loss: 4.8279, Best: 4.0590\n", "Epoch 4, Loss: 3.7606, Best: 3.7606\n", "Epoch 5, Loss: 4.2284, Best: 3.7606\n", "Epoch 6, Loss: 4.5423, Best: 3.7606\n", "Epoch 7, Loss: 4.5410, Best: 3.7606\n", "Epoch 8, Loss: 4.1297, Best: 3.7606\n", "Epoch 9, Loss: 4.2703, Best: 3.7606\n", "Epoch 10, Loss: 3.6612, Best: 3.6612\n", "Epoch 11, Loss: 4.0261, Best: 3.6612\n", "Epoch 12, Loss: 4.5013, Best: 3.6612\n", "Epoch 13, Loss: 3.8839, Best: 3.6612\n", "Epoch 14, Loss: 4.4046, Best: 3.6612\n", "Epoch 15, Loss: 4.4510, Best: 3.6612\n", "Epoch 16, Loss: 4.4549, Best: 3.6612\n", "Epoch 17, Loss: 4.1500, Best: 3.6612\n", "Epoch 18, Loss: 4.5133, Best: 3.6612\n", "Epoch 19, Loss: 3.5382, Best: 3.5382\n", "Epoch 20, Loss: 3.8307, Best: 3.5382\n", "Epoch 21, Loss: 3.8997, Best: 3.5382\n", "Epoch 22, Loss: 3.9955, Best: 3.5382\n", "Epoch 23, Loss: 3.9440, Best: 3.5382\n", "Epoch 24, Loss: 3.9549, Best: 3.5382\n", "Large gradient detected: 2.3065\n", "Epoch 25, Loss: 4.4964, Best: 3.5382\n", "Epoch 26, Loss: 4.0543, Best: 3.5382\n", "Epoch 27, Loss: 3.9648, Best: 3.5382\n", "Epoch 28, Loss: 3.9392, Best: 3.5382\n", "Epoch 29, Loss: 3.9740, Best: 3.5382\n", "Epoch 30, Loss: 3.7496, Best: 3.5382\n", "Epoch 31, Loss: 3.8895, Best: 3.5382\n", "Epoch 32, Loss: 3.8629, Best: 3.5382\n", "Epoch 33, Loss: 3.6194, Best: 3.5382\n", "Epoch 34, Loss: 4.3169, Best: 3.5382\n", "Epoch 35, Loss: 4.2583, Best: 3.5382\n", "Epoch 36, Loss: 3.9958, Best: 3.5382\n", "Epoch 37, Loss: 3.9496, Best: 3.5382\n", "Epoch 38, Loss: 3.5196, Best: 3.5196\n", "Epoch 39, Loss: 4.1977, Best: 3.5196\n", "Epoch 40, Loss: 3.8060, Best: 3.5196\n", "Epoch 41, Loss: 4.0545, Best: 3.5196\n", "Epoch 42, Loss: 3.8050, Best: 3.5196\n", "Epoch 43, Loss: 4.2777, Best: 3.5196\n", "Epoch 44, Loss: 3.8532, Best: 3.5196\n", "Epoch 45, Loss: 3.3192, Best: 3.3192\n", "Epoch 46, Loss: 4.1879, Best: 3.3192\n", "Epoch 47, Loss: 4.2158, Best: 3.3192\n", "Epoch 48, Loss: 3.9239, Best: 3.3192\n", "Epoch 49, Loss: 3.8135, Best: 3.3192\n", "Epoch 50, Loss: 3.8050, Best: 3.3192\n", "\n", "Training completed!\n" ] } ], "source": [ "import copy\n", "\n", "EPOCH = 50\n", "\n", "# 최고 성능 추적을 위한 변수들\n", "best_loss = float('inf') # 가장 좋은 loss 값\n", "best_model_state = None # 최고 성능 모델의 state_dict\n", "best_loss:float\n", "\n", "model.train()\n", "\n", "for i in range(EPOCH):\n", " total_loss = 0\n", " for X_batch, Y_batch in dataloader:\n", " X_batch = X_batch.to(device, non_blocking=True) # non_blocking으로 성능 향상\n", " Y_batch = Y_batch.to(device, non_blocking=True)\n", " \n", " with torch.autograd.detect_anomaly(): # loss nan 발생 시 연산오류 출력\n", " optimizer.zero_grad()\n", " loss = model.calc_loss(X_batch, Y_batch)\n", " \n", " loss.backward()\n", "\n", " # 그래디언트 클리핑 적용\n", " grad_norm = torch.nn.utils.clip_grad_norm_(\n", " model.parameters(), \n", " max_norm=1.0\n", " )\n", " \n", " # 옵션: 그래디언트 모니터링\n", " if grad_norm > 1.0 * 2:\n", " print(f\"Large gradient detected: {grad_norm:.4f}\")\n", " \n", " optimizer.step()\n", "\n", " total_loss += loss.item()\n", "\n", " # 평균 loss 계산\n", " avg_loss = total_loss / len(dataloader)\n", " \n", " # 최고 성능 모델 업데이트\n", " if avg_loss < best_loss:\n", " best_loss = avg_loss\n", " best_model_state = copy.deepcopy(model.state_dict())# 모델의 현재 상태를 깊은 복사로 저장\n", " \n", " print(f\"Epoch {i+1}, Loss: {avg_loss:.4f}, Best: {best_loss:.4f}\")\n", "\n", "# 학습 완료 후 최고 성능 모델 로드\n", "if best_model_state != None:\n", " model.load_state_dict(best_model_state)\n", "print(f\"\\nTraining completed!\")" ] }, { "cell_type": "markdown", "id": "e610b924", "metadata": {}, "source": [ "## 모델 저장" ] }, { "cell_type": "code", "execution_count": 5, "id": "da89b45a", "metadata": {}, "outputs": [], "source": [ "import torch\n", "\n", "torch.save(model.state_dict(), 'DIVA_Model_dict.pt') # 모델 가중치, 매개변수 저장\n", "torch.save(model, 'DIVA_Model_full.pt') # 모델 전체 저장" ] }, { "cell_type": "code", "execution_count": 3, "id": "75530554", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.load_state_dict(torch.load('DIVA_Model_dict.pt')) # 모델 가중치, 매개변수 불러오기" ] }, { "cell_type": "code", "execution_count": 4, "id": "6c7f2aa0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([1, 25])\n", "torch.Size([49, 7])\n" ] }, { "data": { "text/plain": [ "'test_output.mid'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from HarmonyMIDIToken import HarmonyMIDIToken as Tokenizer\n", "from random import randint\n", "\n", "x = X_tensor[randint(0, 33)].unsqueeze(0).to(device)\n", "print(x.shape)\n", "Y = model.generate(x) # 스타일 벡터 하나로 시퀀스 생성\n", "print(Y.shape)\n", "\n", "MIDI = Tokenizer()\n", "MIDI.set_id(Y.tolist())\n", "\n", "midi= MIDI.to_midi() # This should generate MIDI from the stored melody and chords\n", "midi.write('midi', fp='test_output.mid') # Save the generated MIDI to a file" ] }, { "cell_type": "markdown", "id": "a1e4275a", "metadata": {}, "source": [ "## 차원 별로 다른 EOS 사용 시 방법\n", "\n", "---\n", "\n", "### 📌 현재 구조 요약\n", "\n", "* 입력이 다차원(예: 7개의 categorical feature)\n", "* 각 차원(feature)마다 다른 vocab이 있음\n", "* 그리고 **각 차원마다 EOS 인덱스가 다름**\n", "\n", "예:\n", "\n", "```\n", "EOS = [100, 15, 72, 14, 15, 58, 15] # len = 7 (feature 수)\n", "```\n", "\n", "이건 말하자면, **시퀀스의 각 column마다 \"끝나는 값\"이 다르다**는 뜻입니다.\n", "\n", "---\n", "\n", "### ❗ 문제: 생성 시 EOS를 어떻게 감지/제어하냐?\n", "\n", "예를 들어:\n", "\n", "* RNN/LSTM/Transformer로 다음 step을 생성할 때,\n", "* 어떤 timestep에서 **모든 feature들이 EOS 값에 도달했는지**를 판단해야\n", "* **생성 종료** (`stop generation`)를 할 수 있음\n", "\n", "---\n", "\n", "### ✅ 해결 전략\n", "\n", "#### 👇 핵심은: **각 feature의 EOS를 별도로 추적 & 마스킹**하는 것\n", "\n", "---\n", "\n", "#### ✅ 방법 1: EOS를 차원별로 체크하여 \"모두 EOS\"인지 판단\n", "\n", "```python\n", "# 예: generated: [batch, time, num_features]\n", "# EOS: list of ints, len == num_features\n", "\n", "eos_tensor = torch.tensor(EOS, device=generated.device) # [num_features]\n", "last_step = generated[:, -1, :] # [batch, num_features]\n", "\n", "# EOS 매칭 여부: [batch, num_features]\n", "eos_flags = last_step == eos_tensor # broadcasting\n", "\n", "# 모두 EOS면 -> 종료 조건\n", "should_stop = eos_flags.all(dim=-1) # [batch]\n", "```\n", "\n", "---\n", "\n", "#### ✅ 방법 2: 각 차원이 EOS이면, 해당 차원의 logits를 마스킹\n", "\n", "생성 시 특정 feature의 EOS가 나왔으면, **해당 feature 차원만 마스킹**해서 더 이상 생성 못 하게 하는 방식입니다.\n", "\n", "```python\n", "for feature_idx in range(num_features):\n", " eos_value = EOS[feature_idx]\n", "\n", " # 이전에 EOS가 나온 샘플은 더 이상 해당 feature 안 생성하게\n", " eos_mask = generated[:, :, feature_idx] == eos_value # [B, T]\n", "\n", " # logits: [B, vocab_size_per_feature]\n", " # 마스킹 적용\n", " logits[feature_idx][eos_mask] = -float('inf')\n", "```\n", "\n", "⚠️ 이건 logits를 feature마다 따로 생성하거나, feature별 decoder를 쓰는 경우에 사용 가능\n", "\n", "---\n", "\n", "#### ✅ 방법 3: 생성 과정에서 차원별 EOS 도달 상태 추적\n", "\n", "```python\n", "# eos_reached: [batch, num_features], 처음에는 False\n", "eos_reached = torch.zeros(batch_size, num_features, dtype=torch.bool, device=device)\n", "\n", "for t in range(max_steps):\n", " output = model(...)\n", "\n", " # token: [batch, num_features]\n", " token = sample_from_output(output)\n", "\n", " eos_reached |= token == eos_tensor # feature-wise EOS 체크\n", "\n", " # 모두 EOS인 sample이면 중단\n", " should_stop = eos_reached.all(dim=1) # [batch]\n", "```\n", "\n", "---\n", "\n", "#### ✅ 방법 4: EOS를 하나로 통일할 수 있다면 가장 깔끔\n", "\n", "가능하다면 EOS를 모든 feature에서 하나로 통일하는 게 가장 쉬운 방법이지만,\n", "이미 전 feature마다 vocab이 충돌해서 다른 EOS를 쓸 수밖에 없다면 위 전략들을 써야 합니다.\n", "\n", "---\n", "\n", "### 🔧 추가 제안: EOS 마스킹을 위한 구조화된 추적 클래스\n", "\n", "```python\n", "class EOSManager:\n", " def __init__(self, eos_values, num_features):\n", " self.eos_values = torch.tensor(eos_values) # [num_features]\n", " self.num_features = num_features\n", "\n", " def update(self, tokens): # tokens: [batch, num_features]\n", " return tokens == self.eos_values # [batch, num_features]\n", "\n", " def all_eos(self, flags): # flags: [batch, num_features]\n", " return flags.all(dim=1) # [batch]\n", "```\n", "\n", "이렇게 하면 깔끔하게 재사용 가능해요.\n", "\n", "---\n", "\n", "### 🧠 요약\n", "\n", "| 목적 | 방법 |\n", "| -------------------------- | ------------------------------------- |\n", "| 각 feature의 EOS 체크 | `tokens == eos_tensor` (broadcasting) |\n", "| 전체 시퀀스 종료 조건 | `.all(dim=-1)` |\n", "| feature별로 EOS 후 logits 마스킹 | logits\\[:, eos\\_idx] = -inf |\n", "| 자동 추적 관리 | `EOSManager` 같은 헬퍼 클래스 사용 |\n", "\n", "---\n", "\n", "필요하시면 생성 루프 코드나 EOS 헬퍼 전체 구조 예제도 드릴 수 있어요.\n", "사용 중인 모델 구조 (RNN, Transformer 등) 말씀주시면 거기에 맞춰 드릴게요.\n" ] } ], "metadata": { "kernelspec": { "display_name": "diva", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }