rrayy commited on
Commit
788e3af
·
1 Parent(s): 1ba869c

Changes to be committed: 토크나이저에 맞게 데이터 수정

Browse files

modified: DIVA_dataset.pt
modified: data/11.mid
modified: data/12.mid
modified: data/13.mid
modified: data/15.mid
modified: data/33.mid
modified: data/7.mid
modified: preprocessing.ipynb

Files changed (8) hide show
  1. DIVA_dataset.pt +2 -2
  2. data/11.mid +0 -0
  3. data/12.mid +0 -0
  4. data/13.mid +0 -0
  5. data/15.mid +0 -0
  6. data/33.mid +0 -0
  7. data/7.mid +0 -0
  8. preprocessing.ipynb +133 -135
DIVA_dataset.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd45bdbfcc51d36e4a29718f12e9ca4a09d08ed2b429db7801d93969ba175591
3
- size 245710
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6597984bfb6a99a95e1c10ce7293080d95d976f284916d4b8aeff4582f2f22b7
3
+ size 249037
data/11.mid CHANGED
Binary files a/data/11.mid and b/data/11.mid differ
 
data/12.mid CHANGED
Binary files a/data/12.mid and b/data/12.mid differ
 
data/13.mid CHANGED
Binary files a/data/13.mid and b/data/13.mid differ
 
data/15.mid CHANGED
Binary files a/data/15.mid and b/data/15.mid differ
 
data/33.mid CHANGED
Binary files a/data/33.mid and b/data/33.mid differ
 
data/7.mid CHANGED
Binary files a/data/7.mid and b/data/7.mid differ
 
preprocessing.ipynb CHANGED
@@ -336,8 +336,6 @@
336
  " EOS = torch.full((1, 7), 100, dtype=torch.long)\n",
337
  " Y_tensor.append(torch.cat([token, EOS], dim=0))\n",
338
  "\n",
339
- "seq_lengths = torch.tensor([len(seq) for seq in Y_tensor])\n",
340
- "\n",
341
  "# 패딩 처리\n",
342
  "padded_Y = pad_sequence(Y_tensor, batch_first=True, padding_value=-1) # (batch_size, max_len, 7)"
343
  ]
@@ -353,20 +351,18 @@
353
  "output_type": "stream",
354
  "text": [
355
  "X shape: torch.Size([34, 25])\n",
356
- "Y shape: torch.Size([34, 126, 7])\n",
357
- "l shape: torch.Size([34])\n"
358
  ]
359
  }
360
  ],
361
  "source": [
362
  "print(\"X shape:\", X_tensor.shape)\n",
363
- "print(\"Y shape:\", padded_Y.shape)\n",
364
- "print(\"l shape:\", seq_lengths.shape)"
365
  ]
366
  },
367
  {
368
  "cell_type": "code",
369
- "execution_count": 4,
370
  "id": "b4efc676",
371
  "metadata": {},
372
  "outputs": [
@@ -374,131 +370,134 @@
374
  "name": "stdout",
375
  "output_type": "stream",
376
  "text": [
377
- "Y example: tensor([[81, 3, 65, 1, 3, 53, 3],\n",
378
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
379
- " [81, 2, 65, 1, 2, 53, 2],\n",
380
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
381
- " [81, 1, 65, 1, 1, 53, 1],\n",
382
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
383
- " [79, 2, 65, 1, 2, 53, 2],\n",
384
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
385
- " [79, 2, 65, 1, 2, 53, 2],\n",
386
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
387
- " [84, 1, 60, 8, 1, 55, 1],\n",
388
- " [84, 2, 60, 8, 2, 55, 2],\n",
389
- " [ 0, 2, 0, 1, 2, 0, 2],\n",
390
- " [84, 2, 60, 8, 2, 55, 2],\n",
391
- " [83, 1, 60, 8, 1, 55, 1],\n",
392
- " [84, 2, 60, 8, 2, 55, 2],\n",
393
- " [79, 1, 60, 8, 2, 55, 2],\n",
394
- " [ 0, 1, 0, 1, 0, 0, 0],\n",
395
- " [83, 2, 0, 1, 1, 0, 1],\n",
396
- " [ 0, 0, 60, 8, 2, 55, 2],\n",
397
- " [ 0, 2, 0, 1, 2, 48, 2],\n",
398
- " [84, 3, 69, 5, 3, 57, 3],\n",
399
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
400
- " [84, 2, 69, 5, 2, 57, 2],\n",
401
- " [83, 1, 69, 5, 1, 57, 1],\n",
402
- " [84, 2, 69, 5, 2, 57, 2],\n",
403
- " [79, 1, 69, 5, 1, 57, 1],\n",
404
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
405
- " [83, 2, 69, 5, 2, 57, 2],\n",
406
- " [ 0, 2, 0, 1, 2, 50, 2],\n",
407
- " [81, 2, 67, 1, 2, 55, 2],\n",
408
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
409
- " [81, 2, 67, 1, 2, 55, 2],\n",
410
- " [83, 1, 67, 1, 1, 55, 1],\n",
411
- " [84, 2, 67, 1, 2, 55, 2],\n",
412
- " [83, 2, 67, 1, 2, 55, 2],\n",
413
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
414
- " [83, 2, 67, 1, 2, 55, 2],\n",
415
- " [ 0, 2, 0, 1, 2, 48, 2],\n",
416
- " [81, 3, 65, 1, 3, 53, 3],\n",
417
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
418
- " [81, 2, 65, 1, 2, 53, 2],\n",
419
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
420
- " [81, 1, 65, 1, 1, 53, 1],\n",
421
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
422
- " [79, 2, 65, 1, 2, 53, 2],\n",
423
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
424
- " [79, 2, 65, 1, 2, 53, 2],\n",
425
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
426
- " [84, 1, 60, 8, 1, 55, 1],\n",
427
- " [84, 2, 60, 8, 2, 55, 2],\n",
428
- " [ 0, 2, 0, 1, 2, 0, 2],\n",
429
- " [84, 2, 60, 8, 2, 55, 2],\n",
430
- " [83, 1, 60, 8, 1, 55, 1],\n",
431
- " [84, 2, 60, 8, 2, 55, 2],\n",
432
- " [91, 1, 60, 8, 1, 55, 1],\n",
433
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
434
- " [83, 2, 60, 8, 2, 55, 2],\n",
435
- " [ 0, 2, 0, 1, 2, 48, 2],\n",
436
- " [84, 3, 69, 5, 3, 57, 3],\n",
437
- " [ 0, 1, 0, 1, 1, 0, 1],\n",
438
- " [84, 2, 69, 5, 2, 57, 2],\n",
439
- " [83, 1, 69, 5, 1, 57, 1],\n",
440
- " [84, 2, 69, 5, 2, 57, 2],\n",
441
- " [91, 1, 69, 5, 2, 57, 2],\n",
442
- " [ 0, 1, 0, 1, 0, 0, 0],\n",
443
- " [83, 2, 0, 1, 1, 0, 1],\n",
444
- " [ 0, 2, 69, 5, 2, 57, 2],\n",
445
- " [81, 2, 0, 1, 2, 50, 2],\n",
446
- " [ 0, 1, 67, 1, 2, 55, 2],\n",
447
- " [81, 2, 0, 1, 0, 0, 0],\n",
448
- " [86, 1, 0, 1, 1, 0, 1],\n",
449
- " [84, 2, 67, 1, 2, 55, 2],\n",
450
- " [86, 2, 67, 1, 1, 55, 1],\n",
451
- " [ 0, 0, 67, 1, 2, 55, 2],\n",
452
- " [ 0, 1, 67, 1, 2, 55, 2],\n",
453
- " [84, 2, 0, 1, 1, 0, 1],\n",
454
- " [84, 2, 67, 1, 2, 55, 2],\n",
455
- " [84, 2, 67, 1, 2, 48, 2],\n",
456
- " [-1, -1, -1, -1, -1, -1, -1],\n",
457
- " [-1, -1, -1, -1, -1, -1, -1],\n",
458
- " [-1, -1, -1, -1, -1, -1, -1],\n",
459
- " [-1, -1, -1, -1, -1, -1, -1],\n",
460
- " [-1, -1, -1, -1, -1, -1, -1],\n",
461
- " [-1, -1, -1, -1, -1, -1, -1],\n",
462
- " [-1, -1, -1, -1, -1, -1, -1],\n",
463
- " [-1, -1, -1, -1, -1, -1, -1],\n",
464
- " [-1, -1, -1, -1, -1, -1, -1],\n",
465
- " [-1, -1, -1, -1, -1, -1, -1],\n",
466
- " [-1, -1, -1, -1, -1, -1, -1],\n",
467
- " [-1, -1, -1, -1, -1, -1, -1],\n",
468
- " [-1, -1, -1, -1, -1, -1, -1],\n",
469
- " [-1, -1, -1, -1, -1, -1, -1],\n",
470
- " [-1, -1, -1, -1, -1, -1, -1],\n",
471
- " [-1, -1, -1, -1, -1, -1, -1],\n",
472
- " [-1, -1, -1, -1, -1, -1, -1],\n",
473
- " [-1, -1, -1, -1, -1, -1, -1],\n",
474
- " [-1, -1, -1, -1, -1, -1, -1],\n",
475
- " [-1, -1, -1, -1, -1, -1, -1],\n",
476
- " [-1, -1, -1, -1, -1, -1, -1],\n",
477
- " [-1, -1, -1, -1, -1, -1, -1],\n",
478
- " [-1, -1, -1, -1, -1, -1, -1],\n",
479
- " [-1, -1, -1, -1, -1, -1, -1],\n",
480
- " [-1, -1, -1, -1, -1, -1, -1],\n",
481
- " [-1, -1, -1, -1, -1, -1, -1],\n",
482
- " [-1, -1, -1, -1, -1, -1, -1],\n",
483
- " [-1, -1, -1, -1, -1, -1, -1],\n",
484
- " [-1, -1, -1, -1, -1, -1, -1],\n",
485
- " [-1, -1, -1, -1, -1, -1, -1],\n",
486
- " [-1, -1, -1, -1, -1, -1, -1],\n",
487
- " [-1, -1, -1, -1, -1, -1, -1],\n",
488
- " [-1, -1, -1, -1, -1, -1, -1],\n",
489
- " [-1, -1, -1, -1, -1, -1, -1],\n",
490
- " [-1, -1, -1, -1, -1, -1, -1],\n",
491
- " [-1, -1, -1, -1, -1, -1, -1],\n",
492
- " [-1, -1, -1, -1, -1, -1, -1],\n",
493
- " [-1, -1, -1, -1, -1, -1, -1],\n",
494
- " [-1, -1, -1, -1, -1, -1, -1],\n",
495
- " [-1, -1, -1, -1, -1, -1, -1],\n",
496
- " [-1, -1, -1, -1, -1, -1, -1],\n",
497
- " [-1, -1, -1, -1, -1, -1, -1],\n",
498
- " [-1, -1, -1, -1, -1, -1, -1],\n",
499
- " [-1, -1, -1, -1, -1, -1, -1],\n",
500
- " [-1, -1, -1, -1, -1, -1, -1],\n",
501
- " [-1, -1, -1, -1, -1, -1, -1]])\n"
 
 
 
502
  ]
503
  }
504
  ],
@@ -508,7 +507,7 @@
508
  },
509
  {
510
  "cell_type": "code",
511
- "execution_count": 5,
512
  "id": "4f5f5dc1",
513
  "metadata": {},
514
  "outputs": [],
@@ -517,8 +516,7 @@
517
  "\n",
518
  "torch.save({\n",
519
  " \"X\": X_tensor,\n",
520
- " \"Y\": padded_Y,\n",
521
- " \"lengths\": seq_lengths\n",
522
  "}, \"DIVA_dataset.pt\")"
523
  ]
524
  },
 
336
  " EOS = torch.full((1, 7), 100, dtype=torch.long)\n",
337
  " Y_tensor.append(torch.cat([token, EOS], dim=0))\n",
338
  "\n",
 
 
339
  "# 패딩 처리\n",
340
  "padded_Y = pad_sequence(Y_tensor, batch_first=True, padding_value=-1) # (batch_size, max_len, 7)"
341
  ]
 
351
  "output_type": "stream",
352
  "text": [
353
  "X shape: torch.Size([34, 25])\n",
354
+ "Y shape: torch.Size([34, 128, 7])\n"
 
355
  ]
356
  }
357
  ],
358
  "source": [
359
  "print(\"X shape:\", X_tensor.shape)\n",
360
+ "print(\"Y shape:\", padded_Y.shape)"
 
361
  ]
362
  },
363
  {
364
  "cell_type": "code",
365
+ "execution_count": 5,
366
  "id": "b4efc676",
367
  "metadata": {},
368
  "outputs": [
 
370
  "name": "stdout",
371
  "output_type": "stream",
372
  "text": [
373
+ "Y example: tensor([[ 81, 3, 65, 1, 3, 53, 3],\n",
374
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
375
+ " [ 81, 2, 65, 1, 2, 53, 2],\n",
376
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
377
+ " [ 81, 1, 65, 1, 1, 53, 1],\n",
378
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
379
+ " [ 79, 2, 65, 1, 2, 53, 2],\n",
380
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
381
+ " [ 79, 2, 65, 1, 2, 53, 2],\n",
382
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
383
+ " [ 84, 1, 60, 8, 1, 55, 1],\n",
384
+ " [ 84, 2, 60, 8, 2, 55, 2],\n",
385
+ " [ 0, 2, 0, 1, 2, 0, 2],\n",
386
+ " [ 84, 2, 60, 8, 2, 55, 2],\n",
387
+ " [ 83, 1, 60, 8, 1, 55, 1],\n",
388
+ " [ 84, 2, 60, 8, 2, 55, 2],\n",
389
+ " [ 79, 1, 60, 8, 2, 55, 2],\n",
390
+ " [ 0, 1, 0, 1, 0, 0, 0],\n",
391
+ " [ 83, 2, 0, 1, 1, 0, 1],\n",
392
+ " [ 0, 0, 60, 8, 2, 55, 2],\n",
393
+ " [ 0, 2, 0, 1, 2, 48, 2],\n",
394
+ " [ 84, 3, 69, 5, 3, 57, 3],\n",
395
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
396
+ " [ 84, 2, 69, 5, 2, 57, 2],\n",
397
+ " [ 83, 1, 69, 5, 1, 57, 1],\n",
398
+ " [ 84, 2, 69, 5, 2, 57, 2],\n",
399
+ " [ 79, 1, 69, 5, 1, 57, 1],\n",
400
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
401
+ " [ 83, 2, 69, 5, 2, 57, 2],\n",
402
+ " [ 0, 2, 0, 1, 2, 50, 2],\n",
403
+ " [ 81, 2, 67, 1, 2, 55, 2],\n",
404
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
405
+ " [ 81, 2, 67, 1, 2, 55, 2],\n",
406
+ " [ 83, 1, 67, 1, 1, 55, 1],\n",
407
+ " [ 84, 2, 67, 1, 2, 55, 2],\n",
408
+ " [ 83, 2, 67, 1, 2, 55, 2],\n",
409
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
410
+ " [ 83, 2, 67, 1, 2, 55, 2],\n",
411
+ " [ 0, 2, 0, 1, 2, 48, 2],\n",
412
+ " [ 81, 3, 65, 1, 3, 53, 3],\n",
413
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
414
+ " [ 81, 2, 65, 1, 2, 53, 2],\n",
415
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
416
+ " [ 81, 1, 65, 1, 1, 53, 1],\n",
417
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
418
+ " [ 79, 2, 65, 1, 2, 53, 2],\n",
419
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
420
+ " [ 79, 2, 65, 1, 2, 53, 2],\n",
421
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
422
+ " [ 84, 1, 60, 8, 1, 55, 1],\n",
423
+ " [ 84, 2, 60, 8, 2, 55, 2],\n",
424
+ " [ 0, 2, 0, 1, 2, 0, 2],\n",
425
+ " [ 84, 2, 60, 8, 2, 55, 2],\n",
426
+ " [ 83, 1, 60, 8, 1, 55, 1],\n",
427
+ " [ 84, 2, 60, 8, 2, 55, 2],\n",
428
+ " [ 91, 1, 60, 8, 1, 55, 1],\n",
429
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
430
+ " [ 83, 2, 60, 8, 2, 55, 2],\n",
431
+ " [ 0, 2, 0, 1, 2, 48, 2],\n",
432
+ " [ 84, 3, 69, 5, 3, 57, 3],\n",
433
+ " [ 0, 1, 0, 1, 1, 0, 1],\n",
434
+ " [ 84, 2, 69, 5, 2, 57, 2],\n",
435
+ " [ 83, 1, 69, 5, 1, 57, 1],\n",
436
+ " [ 84, 2, 69, 5, 2, 57, 2],\n",
437
+ " [ 91, 1, 69, 5, 2, 57, 2],\n",
438
+ " [ 0, 1, 0, 1, 0, 0, 0],\n",
439
+ " [ 83, 2, 0, 1, 1, 0, 1],\n",
440
+ " [ 0, 2, 69, 5, 2, 57, 2],\n",
441
+ " [ 81, 2, 0, 1, 2, 50, 2],\n",
442
+ " [ 0, 1, 67, 1, 2, 55, 2],\n",
443
+ " [ 81, 2, 0, 1, 0, 0, 0],\n",
444
+ " [ 86, 1, 0, 1, 1, 0, 1],\n",
445
+ " [ 84, 2, 67, 1, 2, 55, 2],\n",
446
+ " [ 86, 2, 67, 1, 1, 55, 1],\n",
447
+ " [ 0, 0, 67, 1, 2, 55, 2],\n",
448
+ " [ 0, 1, 67, 1, 2, 55, 2],\n",
449
+ " [ 84, 2, 0, 1, 1, 0, 1],\n",
450
+ " [ 84, 2, 67, 1, 2, 55, 2],\n",
451
+ " [ 84, 2, 67, 1, 2, 48, 2],\n",
452
+ " [100, 100, 100, 100, 100, 100, 100],\n",
453
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
454
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
455
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
456
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
457
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
458
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
459
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
460
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
461
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
462
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
463
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
464
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
465
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
466
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
467
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
468
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
469
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
470
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
471
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
472
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
473
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
474
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
475
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
476
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
477
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
478
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
479
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
480
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
481
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
482
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
483
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
484
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
485
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
486
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
487
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
488
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
489
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
490
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
491
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
492
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
493
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
494
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
495
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
496
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
497
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
498
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
499
+ " [ -1, -1, -1, -1, -1, -1, -1],\n",
500
+ " [ -1, -1, -1, -1, -1, -1, -1]])\n"
501
  ]
502
  }
503
  ],
 
507
  },
508
  {
509
  "cell_type": "code",
510
+ "execution_count": 6,
511
  "id": "4f5f5dc1",
512
  "metadata": {},
513
  "outputs": [],
 
516
  "\n",
517
  "torch.save({\n",
518
  " \"X\": X_tensor,\n",
519
+ " \"Y\": padded_Y\n",
 
520
  "}, \"DIVA_dataset.pt\")"
521
  ]
522
  },