{ "cells": [ { "cell_type": "markdown", "id": "7a9aa1cb", "metadata": {}, "source": [ "## 미디 파일에 맞는 스타일 벡터 생성을 위한 곳" ] }, { "cell_type": "code", "execution_count": null, "id": "9c131428", "metadata": {}, "outputs": [], "source": [ "from music21 import note, chord\n", "from music21.stream.base import Score\n", "\n", "def score_to_style_vector(score: Score) -> dict:\n", " \"\"\"MIDI 파일(music21 score)을 분석하여 스타일 벡터를 생성하는 함수\"\"\"\n", "\n", " score.show('midi')\n", "\n", " # 1. Key & Mode\n", " key_obj = score.analyze(\"key\")\n", " key = key_obj.tonic.name # type: ignore\n", " mode = key_obj.mode # type: ignore\n", "\n", " # 2. 멜로디 음표 리스트 추출\n", " melody_notes = [n for n in score.flat.notes if isinstance(n, note.Note)]\n", " pitches = [n.pitch.midi for n in melody_notes]\n", " onsets = [n.offset for n in melody_notes]\n", "\n", " # 3. 코드 (화성) 분석\n", " chords = [c for c in score.flat.getElementsByClass(chord.Chord)]\n", " complex_chords = sum([1 for c in chords if len(c.pitches) > 3])\n", " chord_complexity = complex_chords / (len(chords) + 1e-6) # 비율\n", "\n", " # 4. 멜로디 밀도 (단위 박자당 노트 수)\n", " melody_density = len(melody_notes) / (score.highestTime + 1e-6) # type: ignore\n", "\n", " # 5. 싱코페이션 (off-beat 비율: 1/4 박자 단위 기준)\n", " syncopation = sum([1 for o in onsets if (o % 1) != 0]) / (len(onsets) + 1e-6)\n", "\n", " # 6. 피치 범위\n", " pitch_range = max(pitches) - min(pitches) if pitches else 0\n", "\n", " # 7. 사람이 생각하는 곡의 분위기\n", " mood = input(\"분위기 입력 (이 중에서 선택: Happy, Chill, Emotional, Aggressive, Dreamy, Melodic): \")\n", "\n", " # 스타일 벡터 결과\n", " style_vector = {\n", " \"bpm\": 128, # 기본값 설정\n", " \"key\": key,\n", " \"mode\": mode,\n", " \"chord_complexity\": round(chord_complexity, 3),\n", " \"melody_density\": round(melody_density, 3),\n", " \"syncopation\": round(syncopation, 3),\n", " \"pitch_range\": pitch_range,\n", " \"mood\": mood\n", " }\n", "\n", " return style_vector" ] }, { "cell_type": "code", "execution_count": null, "id": "19ba84e1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Skipping non-MIDI file: 0.mid\n", "Skipping non-MIDI file: 1.mid\n", "Skipping non-MIDI file: 10.mid\n", "Skipping non-MIDI file: 11.mid\n", "Skipping non-MIDI file: 12.mid\n", "Skipping non-MIDI file: 13.mid\n", "Skipping non-MIDI file: 14.mid\n", "Skipping non-MIDI file: 15.mid\n", "Skipping non-MIDI file: 16.mid\n", "Skipping non-MIDI file: 17.mid\n", "Skipping non-MIDI file: 18.mid\n", "Skipping non-MIDI file: 19.mid\n", "Skipping non-MIDI file: 2.mid\n", "Skipping non-MIDI file: 20.mid\n", "Skipping non-MIDI file: 21.mid\n", "Skipping non-MIDI file: 22.mid\n", "Skipping non-MIDI file: 23.mid\n", "Skipping non-MIDI file: 24.mid\n", "Skipping non-MIDI file: 25.mid\n", "Skipping non-MIDI file: 26.mid\n", "Skipping non-MIDI file: 27.mid\n", "Skipping non-MIDI file: 28.mid\n", "Skipping non-MIDI file: 29.mid\n", "Skipping non-MIDI file: 3.mid\n", "Skipping non-MIDI file: 30.mid\n", "Skipping non-MIDI file: 31.mid\n", "Skipping non-MIDI file: 32.mid\n", "Skipping non-MIDI file: 33.mid\n", "Skipping non-MIDI file: 4.mid\n", "Skipping non-MIDI file: 5.mid\n", "Skipping non-MIDI file: 6.mid\n", "Skipping non-MIDI file: 7.mid\n", "Skipping non-MIDI file: 8.mid\n", "Skipping non-MIDI file: 9.mid\n" ] } ], "source": [ "from HarmonyMIDIToken import HarmonyMIDIToken as Tokenizer\n", "import os\n", "\n", "#tokenized_data = []\n", "\n", "for filename in os.listdir(\"data\"):\n", " if filename.endswith(\".mid\") and not filename in [i[\"name\"] for i in tokenized_data]:\n", " MIDI = Tokenizer()\n", " print(f\"file name: {filename}\")\n", " MIDI.set_midi(os.path.join(\"data\", filename))\n", " \n", " vector = score_to_style_vector(MIDI.to_midi()) # type: ignore\n", " \n", " tokenized_data.append({\n", " \"name\": filename,\n", " \"vector\":vector,\n", " \"token\":MIDI.token_id\n", " })\n", " else:\n", " print(f\"Skipping non-MIDI or already Done file: {filename}\")\n", " " ] }, { "cell_type": "code", "execution_count": 2, "id": "f79a6edf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Tokenized MIDI data: 34\n" ] } ], "source": [ "print(\"Tokenized MIDI data:\", len(tokenized_data))" ] }, { "cell_type": "code", "execution_count": 3, "id": "21a9ba89", "metadata": {}, "outputs": [], "source": [ "import json # JSON 파일로 토크나이저 미디 데이터 저장\n", "\n", "with open('tokenized_midi_data.json', 'w') as f:\n", " json.dump(tokenized_data, f, indent=4)" ] }, { "cell_type": "code", "execution_count": 1, "id": "dea532d4", "metadata": {}, "outputs": [], "source": [ "import json # JSON으로 저장한 토크나이저 미디 데이터 불러오기\n", "\n", "with open('tokenized_midi_data.json', 'r') as f:\n", " tokenized_data = json.load(f)" ] }, { "cell_type": "markdown", "id": "5a37da0b", "metadata": {}, "source": [ "## 데이터 수정" ] }, { "cell_type": "code", "execution_count": null, "id": "995a6294", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from HarmonyMIDIToken import HarmonyMIDIToken as Tokenizer\n", "\n", "for item in tokenized_data:\n", " if item['vector']['mood'] == \"IDK\":\n", " MIDI = Tokenizer()\n", " \n", " MIDI.set_id(item['token'])\n", " MIDI.to_midi().show('midi')\n", " item['vector']['mood'] = input(\"분위기 재 입력\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "68c42254", "metadata": {}, "outputs": [], "source": [ "for item in tokenized_data:\n", " if not \"name\" in item.keys():\n", " print(item)\n", " item['name'] = item['vector']\n", " item.pop('vector')" ] }, { "cell_type": "code", "execution_count": 2, "id": "8cba97a6", "metadata": {}, "outputs": [], "source": [ "from HarmonyMIDIToken import HarmonyMIDIToken as Tokenizer\n", "import os\n", "\n", "for item in tokenized_data:\n", " filename = item['name']\n", "\n", " MIDI = Tokenizer()\n", " MIDI.set_midi(os.path.join(\"data\", filename))\n", "\n", " try:\n", " item['token'] = MIDI.token_id\n", " except:\n", " print(f\"{item['name']} 을 토크나이즈 하는데 실패함.\")" ] }, { "cell_type": "markdown", "id": "df4c74b3", "metadata": {}, "source": [ "## 데이터 전처리" ] }, { "cell_type": "code", "execution_count": 7, "id": "f7b77c0c", "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import OneHotEncoder, MinMaxScaler\n", "from sklearn.compose import ColumnTransformer\n", "from torch.nn.utils.rnn import pad_sequence\n", "import torch\n", "import pandas as pd\n", "\n", "vector_df = pd.DataFrame([item['vector'] for item in tokenized_data])\n", "\n", "# 전처리 파이프라인\n", "preprocessor = ColumnTransformer([\n", " (\"cat\", OneHotEncoder(sparse_output=False), [\"mode\", \"mood\", \"key\"]),\n", " (\"num\", MinMaxScaler(), [\"bpm\", \"chord_complexity\", \"melody_density\", \"syncopation\", \"pitch_range\"])\n", "])\n", "\n", "X = preprocessor.fit_transform(vector_df)\n", "\n", "#Tensor 변환\n", "X_tensor = torch.tensor(X, dtype=torch.float32)\n", "Y_tensor = []\n", "for item in tokenized_data:\n", " token = torch.tensor(item['token'], dtype=torch.long)\n", " EOS = torch.tensor([[100, 15, 72, 14, 15, 58, 15]], dtype=torch.long)\n", " Y_tensor.append(torch.cat([token, EOS], dim=0))\n", "\n", " #Y_tensor.append(token)\n", "\n", "# 패딩 처리\n", "padded_Y = pad_sequence(Y_tensor, batch_first=True, padding_value=16) # (batch_size, max_len, 7)" ] }, { "cell_type": "code", "execution_count": 8, "id": "dd840788", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "X shape: torch.Size([34, 25])\n", "Y shape: torch.Size([34, 128, 7])\n" ] } ], "source": [ "print(\"X shape:\", X_tensor.shape)\n", "print(\"Y shape:\", padded_Y.shape)" ] }, { "cell_type": "code", "execution_count": 9, "id": "b4efc676", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Y example: tensor([[ 81, 3, 65, 1, 3, 53, 3],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 81, 2, 65, 1, 2, 53, 2],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 81, 1, 65, 1, 1, 53, 1],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 79, 2, 65, 1, 2, 53, 2],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 79, 2, 65, 1, 2, 53, 2],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 84, 1, 60, 8, 1, 55, 1],\n", " [ 84, 2, 60, 8, 2, 55, 2],\n", " [ 0, 2, 0, 1, 2, 0, 2],\n", " [ 84, 2, 60, 8, 2, 55, 2],\n", " [ 83, 1, 60, 8, 1, 55, 1],\n", " [ 84, 2, 60, 8, 2, 55, 2],\n", " [ 79, 1, 60, 8, 2, 55, 2],\n", " [ 0, 1, 0, 1, 0, 0, 0],\n", " [ 83, 2, 0, 1, 1, 0, 1],\n", " [ 0, 0, 60, 8, 2, 55, 2],\n", " [ 0, 2, 0, 1, 2, 48, 2],\n", " [ 84, 3, 69, 5, 3, 57, 3],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 84, 2, 69, 5, 2, 57, 2],\n", " [ 83, 1, 69, 5, 1, 57, 1],\n", " [ 84, 2, 69, 5, 2, 57, 2],\n", " [ 79, 1, 69, 5, 1, 57, 1],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 83, 2, 69, 5, 2, 57, 2],\n", " [ 0, 2, 0, 1, 2, 50, 2],\n", " [ 81, 2, 67, 1, 2, 55, 2],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 81, 2, 67, 1, 2, 55, 2],\n", " [ 83, 1, 67, 1, 1, 55, 1],\n", " [ 84, 2, 67, 1, 2, 55, 2],\n", " [ 83, 2, 67, 1, 2, 55, 2],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 83, 2, 67, 1, 2, 55, 2],\n", " [ 0, 2, 0, 1, 2, 48, 2],\n", " [ 81, 3, 65, 1, 3, 53, 3],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 81, 2, 65, 1, 2, 53, 2],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 81, 1, 65, 1, 1, 53, 1],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 79, 2, 65, 1, 2, 53, 2],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 79, 2, 65, 1, 2, 53, 2],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 84, 1, 60, 8, 1, 55, 1],\n", " [ 84, 2, 60, 8, 2, 55, 2],\n", " [ 0, 2, 0, 1, 2, 0, 2],\n", " [ 84, 2, 60, 8, 2, 55, 2],\n", " [ 83, 1, 60, 8, 1, 55, 1],\n", " [ 84, 2, 60, 8, 2, 55, 2],\n", " [ 91, 1, 60, 8, 1, 55, 1],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 83, 2, 60, 8, 2, 55, 2],\n", " [ 0, 2, 0, 1, 2, 48, 2],\n", " [ 84, 3, 69, 5, 3, 57, 3],\n", " [ 0, 1, 0, 1, 1, 0, 1],\n", " [ 84, 2, 69, 5, 2, 57, 2],\n", " [ 83, 1, 69, 5, 1, 57, 1],\n", " [ 84, 2, 69, 5, 2, 57, 2],\n", " [ 91, 1, 69, 5, 2, 57, 2],\n", " [ 0, 1, 0, 1, 0, 0, 0],\n", " [ 83, 2, 0, 1, 1, 0, 1],\n", " [ 0, 2, 69, 5, 2, 57, 2],\n", " [ 81, 2, 0, 1, 2, 50, 2],\n", " [ 0, 1, 67, 1, 2, 55, 2],\n", " [ 81, 2, 0, 1, 0, 0, 0],\n", " [ 86, 1, 0, 1, 1, 0, 1],\n", " [ 84, 2, 67, 1, 2, 55, 2],\n", " [ 86, 2, 67, 1, 1, 55, 1],\n", " [ 0, 0, 67, 1, 2, 55, 2],\n", " [ 0, 1, 67, 1, 2, 55, 2],\n", " [ 84, 2, 0, 1, 1, 0, 1],\n", " [ 84, 2, 67, 1, 2, 55, 2],\n", " [ 84, 2, 67, 1, 2, 48, 2],\n", " [100, 15, 72, 14, 15, 58, 15],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16],\n", " [ 16, 16, 16, 16, 16, 16, 16]])\n" ] } ], "source": [ "print(\"Y example:\", padded_Y[0])" ] }, { "cell_type": "code", "execution_count": 10, "id": "4f5f5dc1", "metadata": {}, "outputs": [], "source": [ "import torch\n", "\n", "torch.save({\n", " \"X\": X_tensor,\n", " \"Y\": padded_Y\n", "}, \"DIVA_dataset.pt\")" ] }, { "cell_type": "markdown", "id": "224e349f", "metadata": {}, "source": [ "## 전처리 끝!" ] } ], "metadata": { "kernelspec": { "display_name": "diva", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }