rrayy commited on
Commit
36a0566
·
1 Parent(s): 9d1f85a

Changes to be committed: 데이터셋에 Y 길이 추가, dataset 객체로 데이터셋 관리

Browse files

modified: DIVA_dataset.pt
new file: dataset.py
modified: preprocessing.ipynb

Files changed (3) hide show
  1. DIVA_dataset.pt +2 -2
  2. dataset.py +13 -0
  3. preprocessing.ipynb +7 -3
DIVA_dataset.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2aad626b3e4e2ebfcc83c88623dbdee4d6e2ac90dcf6683a66c1b735d8fa51cf
3
- size 327629
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02800f2d52f834ea03fb42ff3d0b7338231e47838013e6cc09d3898ce0fca1bc
3
+ size 328142
dataset.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+
3
+ class MIDIDataset(Dataset):
4
+ def __init__(self, X_tensor, Y_tensor, lengths):
5
+ self.X = X_tensor # [N, feature_dim]
6
+ self.Y = Y_tensor # [N, seq_len]
7
+ self.lengths = lengths # [N] 실제 길이
8
+
9
+ def __len__(self):
10
+ return len(self.X)
11
+
12
+ def __getitem__(self, idx):
13
+ return self.X[idx], self.Y[idx], self.lengths[idx]
preprocessing.ipynb CHANGED
@@ -1378,7 +1378,7 @@
1378
  },
1379
  {
1380
  "cell_type": "code",
1381
- "execution_count": 4,
1382
  "id": "f7b77c0c",
1383
  "metadata": {},
1384
  "outputs": [],
@@ -1403,6 +1403,9 @@
1403
  "max_len = max(len(seq) for seq in tokens)\n",
1404
  "padded_tokens = [seq + [0]*(max_len - len(seq)) for seq in tokens]\n",
1405
  "\n",
 
 
 
1406
  "# Tensor 변환\n",
1407
  "X_tensor = torch.tensor(X, dtype=torch.float32)\n",
1408
  "Y_tensor = torch.tensor(padded_tokens, dtype=torch.long)"
@@ -1410,7 +1413,7 @@
1410
  },
1411
  {
1412
  "cell_type": "code",
1413
- "execution_count": 5,
1414
  "id": "4f5f5dc1",
1415
  "metadata": {},
1416
  "outputs": [],
@@ -1419,7 +1422,8 @@
1419
  "\n",
1420
  "torch.save({\n",
1421
  " \"X\": X_tensor,\n",
1422
- " \"Y\": Y_tensor\n",
 
1423
  "}, \"DIVA_dataset.pt\")"
1424
  ]
1425
  },
 
1378
  },
1379
  {
1380
  "cell_type": "code",
1381
+ "execution_count": 7,
1382
  "id": "f7b77c0c",
1383
  "metadata": {},
1384
  "outputs": [],
 
1403
  "max_len = max(len(seq) for seq in tokens)\n",
1404
  "padded_tokens = [seq + [0]*(max_len - len(seq)) for seq in tokens]\n",
1405
  "\n",
1406
+ "# 각 샘플의 실제 길이\n",
1407
+ "lengths = torch.tensor([len(seq) for seq in tokens], dtype=torch.long)\n",
1408
+ "\n",
1409
  "# Tensor 변환\n",
1410
  "X_tensor = torch.tensor(X, dtype=torch.float32)\n",
1411
  "Y_tensor = torch.tensor(padded_tokens, dtype=torch.long)"
 
1413
  },
1414
  {
1415
  "cell_type": "code",
1416
+ "execution_count": 8,
1417
  "id": "4f5f5dc1",
1418
  "metadata": {},
1419
  "outputs": [],
 
1422
  "\n",
1423
  "torch.save({\n",
1424
  " \"X\": X_tensor,\n",
1425
+ " \"Y\": Y_tensor,\n",
1426
+ " \"lengths\": lengths\n",
1427
  "}, \"DIVA_dataset.pt\")"
1428
  ]
1429
  },