diff --git "a/preprocessing.ipynb" "b/preprocessing.ipynb" new file mode 100644--- /dev/null +++ "b/preprocessing.ipynb" @@ -0,0 +1,1448 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "19ba84e1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from utillity import miditokenizer, midiplayer\n", + "\n", + "tokenized_data = {}\n", + "\n", + "for midi, filename in miditokenizer.get_midi_scores(\"data\"):\n", + " Score = midiplayer.load_score(filename)\n", + " token = miditokenizer.tokenize_midi(midi)\n", + "\n", + " midiplayer.play_score(Score) # type: ignore\n", + " Future_Bounce_like = input(\"Future_Bounce_like\")\n", + " Dark_Bright = input(\"Dark_Bright\")\n", + " Chill_Agressive = input(\"Chill_Agressive\")\n", + "\n", + " tokenized_data[(Future_Bounce_like, Dark_Bright, Chill_Agressive)] = token" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f79a6edf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tokenized MIDI data: 31\n" + ] + } + ], + "source": [ + "print(\"Tokenized MIDI data:\", len(tokenized_data))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "21a9ba89", + "metadata": {}, + "outputs": [], + "source": [ + "import json # JSON 파일로 토크나이저 미디 데이터 저장\n", + "\n", + "\n", + "\n", + "with open('tokenized_midi_data.json', 'w') as f:\n", + " json.dump(tokenized_data, f, indent=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "dea532d4", + "metadata": {}, + "outputs": [], + "source": [ + "import json # JSON으로 저장한 토크나이저 미디 데이터 불러오기\n", + "\n", + "with open('tokenized_midi_data.json', 'r') as f:\n", + " tokenized_data = json.load(f)\n", + "# Convert string keys back to tuple\n", + "tokenized_data = {tuple(key.split(\"|\")): value for key, value in tokenized_data.items()}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "34c244ab", + "metadata": {}, + "outputs": [], + "source": [ + "# value로 key 찾기\n", + "def get_key_by_value(D:dict, value):\n", + " for k, v in D.items():\n", + " if v == value:\n", + " return k\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d6ffbdaf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from utillity import miditokenizer, midiplayer\n", + "# 손실된 데이터 복구용\n", + "already_tokenized:dict = tokenized_data\n", + "tokenized_data = []\n", + "\n", + "for midi, filename in miditokenizer.get_midi_scores(\"data\"):\n", + " Score = midiplayer.load_score(filename)\n", + " token = miditokenizer.tokenize_midi(midi)\n", + "\n", + " if token in already_tokenized.values():\n", + " key = get_key_by_value(already_tokenized, token)\n", + " tokenized_data.append({\"vector\":list(key),\"token\":token}) # type: ignore\n", + " continue\n", + " else:\n", + " midiplayer.play_score(Score) # type: ignore\n", + " Future_Bounce_like = input(\"Future_Bounce_like\")\n", + " Dark_Bright = input(\"Dark_Bright\")\n", + " Chill_Agressive = input(\"Chill_Agressive\")\n", + "\n", + " tokenized_data.append({\"vector\":[Future_Bounce_like, Dark_Bright, Chill_Agressive],\"token\":token})" + ] + }, + { + "cell_type": "markdown", + "id": "660054fb", + "metadata": {}, + "source": [ + "## 미디 파일에 맞는 스타일 벡터 생성을 위한 곳\n", + "\n", + "### 스타일 벡터의 각 차원이 가지는 의미\n", + "- [Future Bounce 스러움(Bass House, J-POP 혼용 미디를 위해), 밝음어두움, 칠어그레시브]\n", + "\n", + "### 벡터값 기준\n", + "- 내 마음, Brooks 스타일 기준(Brooks 곡은 무조건 1,0.5,0.5), Future Bounce 스러움이 뭔데 임마(Brooks와 얼마나 비슷한가)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".conda", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}