rrayy
commited on
Commit
·
36a0566
1
Parent(s):
9d1f85a
Changes to be committed: 데이터셋에 Y 길이 추가, dataset 객체로 데이터셋 관리
Browse filesmodified: DIVA_dataset.pt
new file: dataset.py
modified: preprocessing.ipynb
- DIVA_dataset.pt +2 -2
- dataset.py +13 -0
- preprocessing.ipynb +7 -3
DIVA_dataset.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02800f2d52f834ea03fb42ff3d0b7338231e47838013e6cc09d3898ce0fca1bc
|
| 3 |
+
size 328142
|
dataset.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from torch.utils.data import Dataset
|
| 2 |
+
|
| 3 |
+
class MIDIDataset(Dataset):
|
| 4 |
+
def __init__(self, X_tensor, Y_tensor, lengths):
|
| 5 |
+
self.X = X_tensor # [N, feature_dim]
|
| 6 |
+
self.Y = Y_tensor # [N, seq_len]
|
| 7 |
+
self.lengths = lengths # [N] 실제 길이
|
| 8 |
+
|
| 9 |
+
def __len__(self):
|
| 10 |
+
return len(self.X)
|
| 11 |
+
|
| 12 |
+
def __getitem__(self, idx):
|
| 13 |
+
return self.X[idx], self.Y[idx], self.lengths[idx]
|
preprocessing.ipynb
CHANGED
|
@@ -1378,7 +1378,7 @@
|
|
| 1378 |
},
|
| 1379 |
{
|
| 1380 |
"cell_type": "code",
|
| 1381 |
-
"execution_count":
|
| 1382 |
"id": "f7b77c0c",
|
| 1383 |
"metadata": {},
|
| 1384 |
"outputs": [],
|
|
@@ -1403,6 +1403,9 @@
|
|
| 1403 |
"max_len = max(len(seq) for seq in tokens)\n",
|
| 1404 |
"padded_tokens = [seq + [0]*(max_len - len(seq)) for seq in tokens]\n",
|
| 1405 |
"\n",
|
|
|
|
|
|
|
|
|
|
| 1406 |
"# Tensor 변환\n",
|
| 1407 |
"X_tensor = torch.tensor(X, dtype=torch.float32)\n",
|
| 1408 |
"Y_tensor = torch.tensor(padded_tokens, dtype=torch.long)"
|
|
@@ -1410,7 +1413,7 @@
|
|
| 1410 |
},
|
| 1411 |
{
|
| 1412 |
"cell_type": "code",
|
| 1413 |
-
"execution_count":
|
| 1414 |
"id": "4f5f5dc1",
|
| 1415 |
"metadata": {},
|
| 1416 |
"outputs": [],
|
|
@@ -1419,7 +1422,8 @@
|
|
| 1419 |
"\n",
|
| 1420 |
"torch.save({\n",
|
| 1421 |
" \"X\": X_tensor,\n",
|
| 1422 |
-
" \"Y\": Y_tensor
|
|
|
|
| 1423 |
"}, \"DIVA_dataset.pt\")"
|
| 1424 |
]
|
| 1425 |
},
|
|
|
|
| 1378 |
},
|
| 1379 |
{
|
| 1380 |
"cell_type": "code",
|
| 1381 |
+
"execution_count": 7,
|
| 1382 |
"id": "f7b77c0c",
|
| 1383 |
"metadata": {},
|
| 1384 |
"outputs": [],
|
|
|
|
| 1403 |
"max_len = max(len(seq) for seq in tokens)\n",
|
| 1404 |
"padded_tokens = [seq + [0]*(max_len - len(seq)) for seq in tokens]\n",
|
| 1405 |
"\n",
|
| 1406 |
+
"# 각 샘플의 실제 길이\n",
|
| 1407 |
+
"lengths = torch.tensor([len(seq) for seq in tokens], dtype=torch.long)\n",
|
| 1408 |
+
"\n",
|
| 1409 |
"# Tensor 변환\n",
|
| 1410 |
"X_tensor = torch.tensor(X, dtype=torch.float32)\n",
|
| 1411 |
"Y_tensor = torch.tensor(padded_tokens, dtype=torch.long)"
|
|
|
|
| 1413 |
},
|
| 1414 |
{
|
| 1415 |
"cell_type": "code",
|
| 1416 |
+
"execution_count": 8,
|
| 1417 |
"id": "4f5f5dc1",
|
| 1418 |
"metadata": {},
|
| 1419 |
"outputs": [],
|
|
|
|
| 1422 |
"\n",
|
| 1423 |
"torch.save({\n",
|
| 1424 |
" \"X\": X_tensor,\n",
|
| 1425 |
+
" \"Y\": Y_tensor,\n",
|
| 1426 |
+
" \"lengths\": lengths\n",
|
| 1427 |
"}, \"DIVA_dataset.pt\")"
|
| 1428 |
]
|
| 1429 |
},
|