{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "ePWjo4hLkSZh" }, "source": [ "# Orpheus Embeddings Generator Notebook (ver. 1.0)\n", "\n", "***\n", "\n", "Powered by tegridy-tools: https://github.com/asigalov61/tegridy-tools\n", "\n", "***\n", "\n", "#### Project Los Angeles\n", "\n", "#### Tegridy Code 2026\n", "\n", "***" ] }, { "cell_type": "markdown", "metadata": { "id": "y1H5U8iiAIgD" }, "source": [ "# Setup Environment" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "8Dt7FYceaCKF", "scrolled": true }, "outputs": [], "source": [ "#@title Install all dependencies (run only once per session)\n", "\n", "!git clone https://github.com/asigalov61/tegridy-tools\n", "!pip install tqdm\n", "!pip install ipywidgets\n", "\n", "!pip install einops\n", "!pip install einx\n", "!pip install scikit-learn\n", "!pip install torch-summary\n", "\n", "!pip install huggingface_hub\n", "!pip install hf-transfer\n", "!pip install -U filelock" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Import Modules" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "Lqp3urZyaDAp", "scrolled": true }, "outputs": [], "source": [ "#@title Import all needed modules\n", "\n", "print('=' * 70)\n", "print('Loading needed modules. Please wait...')\n", "\n", "import os\n", "\n", "os.environ[\"HF_HUB_ENABLE_HF_TRANSFER\"] = \"1\"\n", "\n", "print('=' * 70)\n", "print('Loading TMIDIX module...')\n", "\n", "%cd ~/tegridy-tools/tegridy-tools/\n", "\n", "import TMIDIX\n", "\n", "%cd ~/tegridy-tools/tegridy-tools/X-Transformer/\n", "\n", "from x_transformer_2_3_1 import *\n", "\n", "%cd ~\n", "\n", "import random\n", "\n", "import torch\n", "\n", "from torchsummary import summary\n", "from sklearn import metrics\n", "\n", "from huggingface_hub import hf_hub_download\n", "\n", "print('=' * 70)\n", "print('Done!')\n", "print('Enjoy! :)')\n", "print('=' * 70)" ] }, { "cell_type": "markdown", "metadata": { "id": "PcEkAnhyAIgL" }, "source": [ "# Orpheus Decoder Embeddings" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download desired decoder model\n", "\n", "https://huggingface.co/asigalov61/Orpheus-Music-Transformer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "hf_hub_download(repo_id='asigalov61/Orpheus-Music-Transformer',\n", " filename='Orpheus_Music_Transformer_Large_Quality_Fine_Tuned_Model_2027_steps_1.2913_loss_0.6263_acc.pth',\n", " local_dir='./Models/',\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Init model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "SEQ_LEN = 8192\n", "PAD_IDX = 18819\n", "\n", "model = TransformerWrapper(\n", " num_tokens = PAD_IDX+1,\n", " max_seq_len = SEQ_LEN,\n", " attn_layers = Decoder(dim = 2048,\n", " depth = 16,\n", " heads = 16,\n", " rotary_pos_emb = True,\n", " attn_flash = True\n", " )\n", " )\n", "\n", "model = AutoregressiveWrapper(model, ignore_index = PAD_IDX, pad_value=PAD_IDX)\n", "\n", "print('=' * 70)\n", "print('Loading model checkpoint...')\n", "\n", "model_path = './Models/Orpheus_Music_Transformer_Large_Quality_Fine_Tuned_Model_2027_steps_1.2913_loss_0.6263_acc.pth'\n", "\n", "model.load_state_dict(torch.load(model_path))\n", "\n", "print('=' * 70)\n", "\n", "model.cuda()\n", "model.eval()\n", "\n", "model = torch.compile(model)\n", "\n", "print('Done!')\n", "\n", "summary(model)\n", "\n", "dtype = torch.bfloat16\n", "\n", "ctx = torch.amp.autocast(device_type='cuda', dtype=dtype)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load source MIDI" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "midi_file = './tegridy-tools/tegridy-tools/seed2.mid'\n", "\n", "print('=' * 70)\n", "print('Loading MIDI File:', midi_file)\n", "print('=' * 70)\n", "\n", "raw_score = TMIDIX.midi2single_track_ms_score(midi_file, do_not_check_MIDI_signature=True)\n", "\n", "escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True, apply_sustain=True)\n", "\n", "escore_notes = TMIDIX.augment_enhanced_score_notes(escore_notes[0], sort_drums_last=True)\n", "\n", "escore_notes = TMIDIX.remove_duplicate_pitches_from_escore_notes(escore_notes)\n", "\n", "escore_notes = TMIDIX.fix_escore_notes_durations(escore_notes, min_notes_gap=0)\n", "\n", "dscore = TMIDIX.delta_score_notes(escore_notes)\n", "\n", "dcscore = TMIDIX.chordify_score([d[1:] for d in dscore])\n", "\n", "melody_chords = [18816]\n", "\n", "#=======================================================\n", "# MAIN PROCESSING CYCLE\n", "#=======================================================\n", "\n", "for i, c in enumerate(dcscore):\n", "\n", " delta_time = c[0][0]\n", "\n", " melody_chords.append(delta_time)\n", "\n", " for e in c:\n", " \n", " #=======================================================\n", " \n", " # Durations\n", " dur = max(1, min(255, e[1]))\n", "\n", " # Patches\n", " pat = max(0, min(128, e[5]))\n", " \n", " # Pitches\n", " ptc = max(1, min(127, e[3]))\n", " \n", " # Velocities\n", " # Calculating octo-velocity\n", " \n", " vel = max(8, min(127, e[4]))\n", " velocity = round(vel / 15)-1\n", " \n", " #=======================================================\n", " # FINAL NOTE SEQ\n", " #=======================================================\n", " \n", " # Writing final note\n", " pat_ptc = (128 * pat) + ptc \n", " dur_vel = (8 * dur) + velocity\n", "\n", " melody_chords.extend([pat_ptc+256, dur_vel+16768])\n", "\n", "print('Done!')\n", "print('=' * 70)\n", "print('Composition has', len(melody_chords), 'tokens')\n", "print('=' * 70)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Generate single embeddings" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "input_seq = torch.tensor(melody_chords)\n", "\n", "emb = get_embeddings(model,\n", " input_seq,\n", " device=torch.device('cuda')\n", " )\n", "\n", "print(emb)\n", "print(emb.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Generate batched embeddings" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "num_batches = 4\n", "\n", "input_seq = [melody_chords] * num_batches\n", "\n", "emb = get_embeddings_batched(model,\n", " input_seq\n", " )\n", "\n", "print(emb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Orpheus Encoder Embeddings" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download Orpheus encoder model\n", "\n", "https://huggingface.co/asigalov61/Orpheus-Music-Transformer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "hf_hub_download(repo_id='asigalov61/Orpheus-Music-Transformer',\n", " filename='Orpheus_Music_Transformer_Masked_Encoder_Trained_Model_23000_steps_0.6548_loss_0.8132_acc.pth',\n", " local_dir='./Models/',\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Init model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "SEQ_LEN = 8192\n", "PAD_IDX = 18819\n", "MASK_IDX = PAD_IDX+1\n", "VOCAB_SIZE = MASK_IDX+1\n", "\n", "model = TransformerWrapper(\n", " num_tokens = VOCAB_SIZE,\n", " max_seq_len = SEQ_LEN,\n", " attn_layers = Encoder(dim = 2048,\n", " depth = 12,\n", " heads = 16,\n", " rotary_pos_emb = True,\n", " attn_flash = True\n", " )\n", ")\n", "\n", "print('=' * 70)\n", "print('Loading model checkpoint...')\n", "\n", "model_path = './Models/Orpheus_Music_Transformer_Masked_Encoder_Trained_Model_23000_steps_0.6548_loss_0.8132_acc.pth'\n", "\n", "model.load_state_dict(torch.load(model_path))\n", "\n", "print('=' * 70)\n", "\n", "model.cuda()\n", "model.eval()\n", "\n", "model = torch.compile(model)\n", "\n", "print('Done!')\n", "\n", "summary(model)\n", "\n", "dtype = torch.bfloat16\n", "\n", "ctx = torch.amp.autocast(device_type='cuda', dtype=dtype)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Source MIDI" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "midi_file = './tegridy-tools/tegridy-tools/seed2.mid'\n", "\n", "print('=' * 70)\n", "print('Loading MIDI File:', midi_file)\n", "print('=' * 70)\n", "\n", "raw_score = TMIDIX.midi2single_track_ms_score(midi_file, do_not_check_MIDI_signature=True)\n", "\n", "escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True, apply_sustain=True)\n", "\n", "escore_notes = TMIDIX.augment_enhanced_score_notes(escore_notes[0], sort_drums_last=True)\n", "\n", "escore_notes = TMIDIX.remove_duplicate_pitches_from_escore_notes(escore_notes)\n", "\n", "escore_notes = TMIDIX.fix_escore_notes_durations(escore_notes, min_notes_gap=0)\n", "\n", "dscore = TMIDIX.delta_score_notes(escore_notes)\n", "\n", "dcscore = TMIDIX.chordify_score([d[1:] for d in dscore])\n", "\n", "melody_chords = [18816]\n", "\n", "#=======================================================\n", "# MAIN PROCESSING CYCLE\n", "#=======================================================\n", "\n", "for i, c in enumerate(dcscore):\n", "\n", " delta_time = c[0][0]\n", "\n", " melody_chords.append(delta_time)\n", "\n", " for e in c:\n", " \n", " #=======================================================\n", " \n", " # Durations\n", " dur = max(1, min(255, e[1]))\n", "\n", " # Patches\n", " pat = max(0, min(128, e[5]))\n", " \n", " # Pitches\n", " ptc = max(1, min(127, e[3]))\n", " \n", " # Velocities\n", " # Calculating octo-velocity\n", " \n", " vel = max(8, min(127, e[4]))\n", " velocity = round(vel / 15)-1\n", " \n", " #=======================================================\n", " # FINAL NOTE SEQ\n", " #=======================================================\n", " \n", " # Writing final note\n", " pat_ptc = (128 * pat) + ptc \n", " dur_vel = (8 * dur) + velocity\n", "\n", " melody_chords.extend([pat_ptc+256, dur_vel+16768])\n", "\n", "print('Done!')\n", "print('=' * 70)\n", "print('Composition has', len(melody_chords), 'tokens')\n", "print('=' * 70)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Generate embeddings" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "num_batches = 4\n", "\n", "input_seq = [melody_chords] * num_batches\n", "\n", "emb = get_enc_embeddings(model,\n", " input_seq,\n", " device=torch.device('cuda')\n", " )\n", "\n", "print(emb)\n", "print(emb.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Congrats! You did it! :)" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 4 }