Spaces:

manuel-l01
/

InScoreAPI

Sleeping

App Files Files Community

manuel-l01 commited on Jun 18

Commit

572abf8

1 Parent(s): 753bd5a

Initial commit

Browse files

Files changed (32) hide show

Dockerfile +31 -0
README.md +4 -4
__pycache__/utils.cpython-311.pyc +0 -0
agents/__innit__.py +0 -0
agents/__pycache__/agents.cpython-311.pyc +0 -0
agents/__pycache__/harmonize.cpython-311.pyc +0 -0
agents/__pycache__/harmonize.cpython-312.pyc +0 -0
agents/__pycache__/utils.cpython-311.pyc +0 -0
agents/agents.py +422 -0
agents/utils.py +15 -0
anticipation/__init__.py +9 -0
anticipation/__pycache__/__init__.cpython-311.pyc +0 -0
anticipation/__pycache__/config.cpython-311.pyc +0 -0
anticipation/__pycache__/convert.cpython-311.pyc +0 -0
anticipation/__pycache__/ops.cpython-311.pyc +0 -0
anticipation/__pycache__/sample.cpython-311.pyc +0 -0
anticipation/__pycache__/tokenize.cpython-311.pyc +0 -0
anticipation/__pycache__/visuals.cpython-311.pyc +0 -0
anticipation/__pycache__/vocab.cpython-311.pyc +0 -0
anticipation/config-original.py +60 -0
anticipation/config.py +60 -0
anticipation/convert-original.py +342 -0
anticipation/convert.py +365 -0
anticipation/ops.py +285 -0
anticipation/sample.py +280 -0
anticipation/tokenize.py +219 -0
anticipation/visuals.py +65 -0
anticipation/vocab.py +58 -0
api.py +240 -0
examples/full-score3.mid +0 -0
examples/strawberry.mid +0 -0
requirements.txt +11 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,31 @@

+FROM python:3.9-slim
+# Create dedicated user with home directory
+RUN useradd -m -u 1000 user
+# Set Hugging Face cache to user's writable directory
+ENV HF_HOME=/home/user/.cache/huggingface
+ENV TRANSFORMERS_CACHE=/home/user/.cache/huggingface
+# Create cache directory with proper permissions
+RUN mkdir -p ${HF_HOME} && chown -R user:user /home/user
+# Set working directory (app will live here)
+WORKDIR /app
+# Install dependencies as root
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt gunicorn
+# Copy app files (maintain ownership)
+COPY --chown=user:user . .
+RUN rm -rf /root/.cache/pip
+# Switch to non-root user
+USER user
+EXPOSE 7860
+CMD ["gunicorn", "--workers", "1", "--timeout", "120", "--bind", "0.0.0.0:7860", "api:app"]

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: InScoreAPI
-emoji: 📉
-colorFrom: green
-colorTo: yellow
 sdk: docker
 pinned: false
 ---

 ---
+title: InScoreAI
+emoji: 📚
+colorFrom: gray
+colorTo: purple
 sdk: docker
 pinned: false
 ---

__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (229 Bytes). View file

agents/__innit__.py ADDED Viewed

File without changes

agents/__pycache__/agents.cpython-311.pyc ADDED Viewed

Binary file (18.1 kB). View file

agents/__pycache__/harmonize.cpython-311.pyc ADDED Viewed

Binary file (19.2 kB). View file

agents/__pycache__/harmonize.cpython-312.pyc ADDED Viewed

Binary file (11 kB). View file

agents/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (691 Bytes). View file

agents/agents.py ADDED Viewed

	@@ -0,0 +1,422 @@

+from anticipation import ops
+from anticipation.sample import generate
+from anticipation.tokenize import extract_instruments
+from anticipation.convert import events_to_midi,midi_to_events, compound_to_midi
+from anticipation.config import *
+from anticipation.vocab import *
+from anticipation.convert import midi_to_compound
+import mido
+from agents.utils import load_midi_metadata
+SMALL_MODEL = 'stanford-crfm/music-small-800k'     # faster inference, worse sample quality
+MEDIUM_MODEL = 'stanford-crfm/music-medium-800k'   # slower inference, better sample quality
+LARGE_MODEL = 'stanford-crfm/music-large-800k'     # slowest inference, best sample quality
+def harmonize_midi(model, midi, start_time, end_time,original_tempo,original_time_sig,top_p):
+    # Turn full midi to events
+    events = midi_to_events(midi)
+    print("Midi converted to events")
+    # Get clip from 0 to end of full midi
+    segment = ops.clip(events, 0, ops.max_time(events, seconds=True))
+    segment = ops.translate(segment, -ops.min_time(segment, seconds=False))
+    # Extract melody and accompaniment
+    events, melody = extract_instruments(segment, [0])
+    print("Melody extracted")
+    print("Start time:", start_time)
+    print("End time:", end_time)
+    # Get initial prompt
+    history = ops.clip(events, 0, start_time, clip_duration=False)
+    anticipated = [CONTROL_OFFSET + tok for tok in ops.clip(events, end_time, ops.max_time(segment, seconds=True), clip_duration=False)]
+    # Generate accompaniment conditioning on melody
+    accompaniment = generate(model, start_time, end_time, inputs=history, controls=melody, top_p=top_p, debug=False)
+    # Append anticipated continuation to accompaniment
+    accompaniment = ops.combine(accompaniment, anticipated)
+    print("Accompaniment generated")
+    # 1) render each voice separately
+    mel_mid = events_to_midi(melody)
+    acc_mid = events_to_midi(accompaniment)
+    # 2) build a fresh MidiFile
+    combined = mido.MidiFile()
+    combined.ticks_per_beat = mel_mid.ticks_per_beat  # or TIME_RESOLUTION//2
+    print("Midi built")
+    # 3) meta‐track with tempo & time signature
+    meta = mido.MidiTrack()
+    meta.append(mido.MetaMessage('set_tempo', tempo=original_tempo))
+    meta.append(mido.MetaMessage('time_signature',
+                                numerator=original_time_sig[0],
+                                denominator=original_time_sig[1]))
+    combined.tracks.append(meta)
+    # 4) append melody *then* accompaniment
+    combined.tracks.extend(mel_mid.tracks[1:])  # Skip existing meta track
+    combined.tracks.extend(acc_mid.tracks[1:])
+    # 5) save in exactly that order
+    for track in combined.tracks:
+        for msg in track:
+            if msg.type in ['note_on', 'note_off']:
+                # Ensure valid MIDI values
+                if hasattr(msg, 'velocity'):
+                    msg.velocity = min(max(msg.velocity, 0), 127)
+                if hasattr(msg, 'note'):
+                    msg.note = min(max(msg.note, 0), 127)
+    print(f"Melody tracks: {len(mel_mid.tracks)}")
+    print(f"Accompaniment tracks: {len(acc_mid.tracks)}")
+    print(f"Combined tracks before cleanup: {len(combined.tracks)}")
+    # Add track cleanup (keep only unique tracks):
+    unique_tracks = []
+    seen = set()
+    for track in combined.tracks:
+        track_hash = str([msg.hex() for msg in track])
+        if track_hash not in seen:
+            unique_tracks.append(track)
+            seen.add(track_hash)
+    combined.tracks = unique_tracks
+    print(f"Final track count: {len(combined.tracks)}")
+    print("Output Midi metadata added")
+    return combined
+def harmonizer(ai_model,midi_file, start_time, end_time,top_p):
+    """
+    this function harmonizes a melody in a MIDI file
+    returns the harmonized MIDI
+    Args:
+    midi_file: path to the MIDI file
+    start_time: start time of the selected measure (melody you want to harmonize) in milliseconds
+    end_time: end time of the selected measure in milliseconds
+    """
+    print(f"Original MIDI tracks: {len(midi_file.tracks)}")
+    # Load metadata and model...
+    # Log original note parameters
+    for track in midi_file.tracks:
+        for msg in track:
+            if msg.type in ['note_on', 'note_off']:
+                if msg.velocity > 127 or msg.velocity < 0:
+                    print(f"Invalid velocity: {msg.velocity}")
+                if msg.note > 127 or msg.note < 0:
+                    print(f"Invalid pitch: {msg.note}")
+    # Load original MIDI and extract metadata
+    midi, original_tempo, original_time_sig = load_midi_metadata(midi_file)
+    print("Midi metadata loaded")
+    # load an anticipatory music transformer
+    model = ai_model # add .cuda() if you have a GPU
+    print("Model loaded")
+    harmonized_midi = harmonize_midi(model, midi, start_time, end_time, original_tempo,original_time_sig,top_p)
+    print("Midi generated")
+    print(f"Harmonized MIDI tracks: {len(harmonized_midi.tracks)}")
+    # Add MIDI validation
+    for track in harmonized_midi.tracks:
+        for msg in track:
+            if msg.type in ['note_on', 'note_off']:
+                # Clamp invalid values
+                msg.velocity = min(max(msg.velocity, 0), 127)
+                msg.note = min(max(msg.note, 0), 127)
+    print("Midi saved")
+    return harmonized_midi
+def infill_midi(model, midi, start_time, end_time,original_tempo,original_time_sig,top_p):
+    # Turn full midi to events
+    events = midi_to_events(midi)
+    print("Midi converted to events")
+    # Get clip from 0 to end of full midi
+    segment = ops.clip(events, 0, ops.max_time(events, seconds=True))
+    segment = ops.translate(segment, -ops.min_time(segment, seconds=False))
+    # Get initial prompt
+    history = ops.clip(events, 0, start_time, clip_duration=False)
+    anticipated = [CONTROL_OFFSET + tok for tok in ops.clip(events, end_time, ops.max_time(segment, seconds=True), clip_duration=False)]
+    # Generate accompaniment conditioning on melody
+    infilling = generate(model, start_time, end_time, inputs=history, controls=anticipated, top_p=top_p, debug=False)
+    # Append anticipated continuation to accompaniment
+    full_events = ops.combine(infilling, anticipated)
+    print("Accompaniment generated")
+    # 1) render each voice separately
+    full_mid = events_to_midi(full_events)
+    # 2) build a fresh MidiFile
+    combined = mido.MidiFile()
+    combined.ticks_per_beat = full_mid.ticks_per_beat  # or TIME_RESOLUTION//2
+    print("Midi built")
+    # 3) meta‐track with tempo & time signature
+    meta = mido.MidiTrack()
+    meta.append(mido.MetaMessage('set_tempo', tempo=original_tempo))
+    meta.append(mido.MetaMessage('time_signature',
+                                numerator=original_time_sig[0],
+                                denominator=original_time_sig[1]))
+    combined.tracks.append(meta)
+    # 4) append melody *then* accompaniment
+    combined.tracks.extend(full_mid.tracks[:])  # Skip existing meta track
+    # 5) save in exactly that order
+    for track in combined.tracks:
+        for msg in track:
+            if msg.type in ['note_on', 'note_off']:
+                # Ensure valid MIDI values
+                if hasattr(msg, 'velocity'):
+                    msg.velocity = min(max(msg.velocity, 0), 127)
+                if hasattr(msg, 'note'):
+                    msg.note = min(max(msg.note, 0), 127)
+    print(f"Melody tracks: {len(full_mid.tracks)}")
+    print(f"Accompaniment tracks: {len(full_mid.tracks)}")
+    print(f"Combined tracks before cleanup: {len(combined.tracks)}")
+    # Add track cleanup (keep only unique tracks):
+    unique_tracks = []
+    seen = set()
+    for track in combined.tracks:
+        track_hash = str([msg.hex() for msg in track])
+        if track_hash not in seen:
+            unique_tracks.append(track)
+            seen.add(track_hash)
+    combined.tracks = unique_tracks
+    print(f"Final track count: {len(combined.tracks)}")
+    print("Output Midi metadata added")
+    return combined
+def infiller(ai_model,midi_file, start_time, end_time,top_p):
+    """
+    this function harmonizes a melody in a MIDI file
+    returns the harmonized MIDI
+    Args:
+    midi_file: path to the MIDI file
+    start_time: start time of the selected measure (melody you want to harmonize) in milliseconds
+    end_time: end time of the selected measure in milliseconds
+    """
+    print(f"Original MIDI tracks: {len(midi_file.tracks)}")
+    # Load metadata and model...
+    # Log original note parameters
+    for track in midi_file.tracks:
+        for msg in track:
+            if msg.type in ['note_on', 'note_off']:
+                if msg.velocity > 127 or msg.velocity < 0:
+                    print(f"Invalid velocity: {msg.velocity}")
+                if msg.note > 127 or msg.note < 0:
+                    print(f"Invalid pitch: {msg.note}")
+    # Load original MIDI and extract metadata
+    midi, original_tempo, original_time_sig = load_midi_metadata(midi_file)
+    print("Midi metadata loaded")
+    # load an anticipatory music transformer
+    model = ai_model # add .cuda() if you have a GPU
+    print("Model loaded")
+    infilled_midi = infill_midi(model, midi, start_time, end_time, original_tempo,original_time_sig,top_p)
+    print("Midi generated")
+    print(f"Harmonized MIDI tracks: {len(infilled_midi.tracks)}")
+    # Add MIDI validation
+    for track in infilled_midi.tracks:
+        for msg in track:
+            if msg.type in ['note_on', 'note_off']:
+                # Clamp invalid values
+                msg.velocity = min(max(msg.velocity, 0), 127)
+                msg.note = min(max(msg.note, 0), 127)
+    print("Midi saved")
+    return infilled_midi
+def change_melody_midi(model, midi, start_time, end_time,original_tempo,original_time_sig,top_p):
+    events = midi_to_events(midi)
+    segment = ops.clip(events, 0, ops.max_time(events, seconds=True))
+    segment = ops.translate(segment, -ops.min_time(segment, seconds=False))
+    # Extract melody (instrument 0) as events and accompaniment as controls
+    instruments = list(ops.get_instruments(segment).keys())
+    accompaniment_instruments = [instr for instr in instruments if instr != 0]
+    melody_events, accompaniment_controls = extract_instruments(segment, accompaniment_instruments)
+    # Get initial prompt (melody before start_time)
+    history = ops.clip(melody_events, 0, start_time, clip_duration=False)
+    # Include accompaniment controls for the entire duration
+    controls = accompaniment_controls  # Full accompaniment as controls
+    # Generate new melody conditioned on accompaniment
+    infilling = generate(model, start_time, end_time, inputs=history, controls=controls, top_p=top_p, debug=False)
+    # Append anticipated continuation
+    anticipated_melody = [CONTROL_OFFSET + tok for tok in ops.clip(melody_events, end_time, ops.max_time(segment, seconds=True), clip_duration=False)]
+    full_events = ops.combine(infilling, anticipated_melody)
+    acc_mid = events_to_midi(accompaniment_controls)
+    # Render and combine MIDI tracks
+    full_mid = events_to_midi(full_events)
+    combined = mido.MidiFile()
+    combined.ticks_per_beat = full_mid.ticks_per_beat  # or TIME_RESOLUTION//2
+    print("Midi built")
+    # 3) meta‐track with tempo & time signature
+    meta = mido.MidiTrack()
+    meta.append(mido.MetaMessage('set_tempo', tempo=original_tempo))
+    meta.append(mido.MetaMessage('time_signature',
+                                numerator=original_time_sig[0],
+                                denominator=original_time_sig[1]))
+    combined.tracks.append(meta)
+    # 4) append melody *then* accompaniment
+    combined.tracks.extend(full_mid.tracks[:])  # Skip existing meta track
+    combined.tracks.extend(acc_mid.tracks[:])  # Skip existing meta track
+    # 5) save in exactly that order
+    for track in combined.tracks:
+        for msg in track:
+            if msg.type in ['note_on', 'note_off']:
+                # Ensure valid MIDI values
+                if hasattr(msg, 'velocity'):
+                    msg.velocity = min(max(msg.velocity, 0), 127)
+                if hasattr(msg, 'note'):
+                    msg.note = min(max(msg.note, 0), 127)
+    print(f"Melody tracks: {len(full_mid.tracks)}")
+    print(f"Accompaniment tracks: {len(full_mid.tracks)}")
+    print(f"Combined tracks before cleanup: {len(combined.tracks)}")
+    # Add track cleanup (keep only unique tracks):
+    unique_tracks = []
+    seen = set()
+    for track in combined.tracks:
+        track_hash = str([msg.hex() for msg in track])
+        if track_hash not in seen:
+            unique_tracks.append(track)
+            seen.add(track_hash)
+    combined.tracks = unique_tracks
+    print(f"Final track count: {len(combined.tracks)}")
+    print("Output Midi metadata added")
+    return combined
+def change_melody(ai_model,midi_file, start_time, end_time,top_p):
+    """
+    this function harmonizes a melody in a MIDI file
+    returns the harmonized MIDI
+    Args:
+    midi_file: path to the MIDI file
+    start_time: start time of the selected measure (melody you want to harmonize) in milliseconds
+    end_time: end time of the selected measure in milliseconds
+    """
+    print(f"Original MIDI tracks: {len(midi_file.tracks)}")
+    # Load metadata and model...
+    # Log original note parameters
+    for track in midi_file.tracks:
+        for msg in track:
+            if msg.type in ['note_on', 'note_off']:
+                if msg.velocity > 127 or msg.velocity < 0:
+                    print(f"Invalid velocity: {msg.velocity}")
+                if msg.note > 127 or msg.note < 0:
+                    print(f"Invalid pitch: {msg.note}")
+    # Load original MIDI and extract metadata
+    midi, original_tempo, original_time_sig = load_midi_metadata(midi_file)
+    print("Midi metadata loaded")
+    # load an anticipatory music transformer
+    model = ai_model # add .cuda() if you have a GPU
+    print("Model loaded")
+    change_melody_gen_midi = change_melody_midi(model, midi, start_time, end_time, original_tempo,original_time_sig,top_p)
+    print("Midi generated")
+    print(f"Harmonized MIDI tracks: {len(change_melody_gen_midi.tracks)}")
+    # Add MIDI validation
+    for track in change_melody_gen_midi.tracks:
+        for msg in track:
+            if msg.type in ['note_on', 'note_off']:
+                # Clamp invalid values
+                msg.velocity = min(max(msg.velocity, 0), 127)
+                msg.note = min(max(msg.note, 0), 127)
+    print("Midi saved")
+    return change_melody_gen_midi

agents/utils.py ADDED Viewed

	@@ -0,0 +1,15 @@

+def load_midi_metadata(midi_file):
+        original_tempo = 500000  # default tempo (120 BPM)
+        original_time_sig = (4, 4)  # default time signature
+        for msg in midi_file:
+            if msg.type == 'set_tempo':
+                original_tempo = msg.tempo
+            elif msg.type == 'time_signature':
+                original_time_sig = (msg.numerator, msg.denominator)
+        return midi_file, original_tempo, original_time_sig

anticipation/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+""" Infrastructure for constructing anticipatory infilling models.
+This model provides infrastructure to preprocess Midi music datasets
+for training anticipatory music infilling models. For more context, see:
+    Anticipatory Music Transformer
+    John Thickstun, David Hall, Chris Donahue, Percy Liang
+    Preprint Report, 2023
+"""

anticipation/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (598 Bytes). View file

anticipation/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (2.08 kB). View file

anticipation/__pycache__/convert.cpython-311.pyc ADDED Viewed

Binary file (19 kB). View file

anticipation/__pycache__/ops.cpython-311.pyc ADDED Viewed

Binary file (12.1 kB). View file

anticipation/__pycache__/sample.cpython-311.pyc ADDED Viewed

Binary file (13.6 kB). View file

anticipation/__pycache__/tokenize.cpython-311.pyc ADDED Viewed

Binary file (12.8 kB). View file

anticipation/__pycache__/visuals.cpython-311.pyc ADDED Viewed

Binary file (4.02 kB). View file

anticipation/__pycache__/vocab.cpython-311.pyc ADDED Viewed

Binary file (2.62 kB). View file

anticipation/config-original.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""
+Global configuration for anticipatory infilling models.
+"""
+# model hyper-parameters
+CONTEXT_SIZE = 1024                # model context
+EVENT_SIZE = 3                     # each event/control is encoded as 3 tokens
+M = 341                            # model context (1024 = 1 + EVENT_SIZE*M)
+DELTA = 5                          # anticipation time in seconds
+assert CONTEXT_SIZE == 1+EVENT_SIZE*M
+# vocabulary constants
+MAX_TIME_IN_SECONDS = 100          # exclude very long training sequences
+MAX_DURATION_IN_SECONDS = 10       # maximum duration of a note
+TIME_RESOLUTION = 100              # 10ms time resolution = 100 bins/second
+MAX_PITCH = 128                    # 128 MIDI pitches
+MAX_INSTR = 129                    # 129 MIDI instruments (128 + drums)
+MAX_NOTE = MAX_PITCH*MAX_INSTR     # note = pitch x instrument
+MAX_INTERARRIVAL_IN_SECONDS = 10   # maximum interarrival time (for MIDI-like encoding)
+# preprocessing settings
+PREPROC_WORKERS = 16
+COMPOUND_SIZE = 5                  # event size in the intermediate compound tokenization
+MAX_TRACK_INSTR = 16               # exclude tracks with large numbers of instruments
+MAX_TRACK_TIME_IN_SECONDS = 3600   # exclude very long tracks (longer than 1 hour)
+MIN_TRACK_TIME_IN_SECONDS = 10     # exclude very short tracks (less than 10 seconds)
+MIN_TRACK_EVENTS = 100             # exclude very short tracks (less than 100 events)
+# LakhMIDI dataset splits
+LAKH_SPLITS = ['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f']
+LAKH_VALID = ['e']
+LAKH_TEST = ['f']
+# derived quantities
+MAX_TIME = TIME_RESOLUTION*MAX_TIME_IN_SECONDS
+MAX_DUR = TIME_RESOLUTION*MAX_DURATION_IN_SECONDS
+MAX_INTERARRIVAL = TIME_RESOLUTION*MAX_INTERARRIVAL_IN_SECONDS
+if __name__ == '__main__':
+    print('Model constants:')
+    print(f'  -> anticipation interval: {DELTA}s')
+    print('Vocabulary constants:')
+    print(f'  -> maximum time of a sequence: {MAX_TIME_IN_SECONDS}s')
+    print(f'  -> maximum duration of a note: {MAX_DURATION_IN_SECONDS}s')
+    print(f'  -> time resolution: {TIME_RESOLUTION}bins/s ({1000//TIME_RESOLUTION}ms)')
+    print(f'  -> maximum interarrival-time (MIDI-like encoding): {MAX_INTERARRIVAL_IN_SECONDS}s')
+    print('Preprocessing constants:')
+    print(f'  -> maximum time of a track: {MAX_TRACK_TIME_IN_SECONDS}s')
+    print(f'  -> minimum events in a track: {MIN_TRACK_EVENTS}s')

anticipation/config.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""
+Global configuration for anticipatory infilling models.
+"""
+# model hyper-parameters
+CONTEXT_SIZE = 1024                # model context
+EVENT_SIZE = 3                     # each event/control is encoded as 3 tokens
+M = 341                            # model context (1024 = 1 + EVENT_SIZE*M)
+DELTA = 5                          # anticipation time in seconds
+assert CONTEXT_SIZE == 1+EVENT_SIZE*M
+# vocabulary constants
+MAX_TIME_IN_SECONDS = 100          # exclude very long training sequences
+MAX_DURATION_IN_SECONDS = 10       # maximum duration of a note
+TIME_RESOLUTION = 100              # 10ms time resolution = 100 bins/second
+MAX_PITCH = 128                    # 128 MIDI pitches
+MAX_INSTR = 129                    # 129 MIDI instruments (128 + drums)
+MAX_NOTE = MAX_PITCH*MAX_INSTR     # note = pitch x instrument
+MAX_INTERARRIVAL_IN_SECONDS = 10   # maximum interarrival time (for MIDI-like encoding)
+# preprocessing settings
+PREPROC_WORKERS = 16
+COMPOUND_SIZE = 5                  # event size in the intermediate compound tokenization
+MAX_TRACK_INSTR = 16               # exclude tracks with large numbers of instruments
+MAX_TRACK_TIME_IN_SECONDS = 3600   # exclude very long tracks (longer than 1 hour)
+MIN_TRACK_TIME_IN_SECONDS = 10     # exclude very short tracks (less than 10 seconds)
+MIN_TRACK_EVENTS = 100             # exclude very short tracks (less than 100 events)
+# LakhMIDI dataset splits
+LAKH_SPLITS = ['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f']
+LAKH_VALID = ['e']
+LAKH_TEST = ['f']
+# derived quantities
+MAX_TIME = TIME_RESOLUTION*MAX_TIME_IN_SECONDS
+MAX_DUR = TIME_RESOLUTION*MAX_DURATION_IN_SECONDS
+MAX_INTERARRIVAL = TIME_RESOLUTION*MAX_INTERARRIVAL_IN_SECONDS
+if __name__ == '__main__':
+    print('Model constants:')
+    print(f'  -> anticipation interval: {DELTA}s')
+    print('Vocabulary constants:')
+    print(f'  -> maximum time of a sequence: {MAX_TIME_IN_SECONDS}s')
+    print(f'  -> maximum duration of a note: {MAX_DURATION_IN_SECONDS}s')
+    print(f'  -> time resolution: {TIME_RESOLUTION}bins/s ({1000//TIME_RESOLUTION}ms)')
+    print(f'  -> maximum interarrival-time (MIDI-like encoding): {MAX_INTERARRIVAL_IN_SECONDS}s')
+    print('Preprocessing constants:')
+    print(f'  -> maximum time of a track: {MAX_TRACK_TIME_IN_SECONDS}s')
+    print(f'  -> minimum events in a track: {MIN_TRACK_EVENTS}s')

anticipation/convert-original.py ADDED Viewed

	@@ -0,0 +1,342 @@

+"""
+Utilities for converting to and from Midi data and encoded/tokenized data.
+"""
+from collections import defaultdict
+import mido
+from anticipation.config import *
+from anticipation.vocab import *
+from anticipation.ops import unpad
+def midi_to_interarrival(midifile, debug=False, stats=False):
+    midi = mido.MidiFile(midifile)
+    tokens = []
+    dt = 0
+    instruments = defaultdict(int) # default to code 0 = piano
+    tempo = 500000 # default tempo: 500000 microseconds per beat
+    truncations = 0
+    for message in midi:
+        dt += message.time
+        # sanity check: negative time?
+        if message.time < 0:
+            raise ValueError
+        if message.type == 'program_change':
+            instruments[message.channel] = message.program
+        elif message.type in ['note_on', 'note_off']:
+            delta_ticks = min(round(TIME_RESOLUTION*dt), MAX_INTERARRIVAL-1)
+            if delta_ticks != round(TIME_RESOLUTION*dt):
+                truncations += 1
+            if delta_ticks > 0: # if time elapsed since last token
+                tokens.append(MIDI_TIME_OFFSET + delta_ticks) # add a time step event
+            # special case: channel 9 is drums!
+            inst = 128 if message.channel == 9 else instruments[message.channel]
+            offset = MIDI_START_OFFSET if message.type == 'note_on' and message.velocity > 0 else MIDI_END_OFFSET
+            tokens.append(offset + (2**7)*inst + message.note)
+            dt = 0
+        elif message.type == 'set_tempo':
+            tempo = message.tempo
+        elif message.type == 'time_signature':
+            pass # we use real time
+        elif message.type in ['aftertouch', 'polytouch', 'pitchwheel', 'sequencer_specific']:
+            pass # we don't attempt to model these
+        elif message.type == 'control_change':
+            pass # this includes pedal and per-track volume: ignore for now
+        elif message.type in ['track_name', 'text', 'end_of_track', 'lyrics', 'key_signature',
+                              'copyright', 'marker', 'instrument_name', 'cue_marker',
+                              'device_name', 'sequence_number']:
+            pass # possibly useful metadata but ignore for now
+        elif message.type == 'channel_prefix':
+            pass # relatively common, but can we ignore this?
+        elif message.type in ['midi_port', 'smpte_offset', 'sysex']:
+            pass # I have no idea what this is
+        else:
+            if debug:
+                print('UNHANDLED MESSAGE', message.type, message)
+    if stats:
+        return tokens, truncations
+    return tokens
+def interarrival_to_midi(tokens, debug=False):
+    mid = mido.MidiFile()
+    mid.ticks_per_beat = TIME_RESOLUTION // 2 # 2 beats/second at quarter=120
+    track_idx = {} # maps instrument to (track number, current time)
+    time_in_ticks = 0
+    num_tracks = 0
+    for token in tokens:
+        if token == MIDI_SEPARATOR:
+            continue
+        if token < MIDI_START_OFFSET:
+            time_in_ticks += token - MIDI_TIME_OFFSET
+        elif token < MIDI_END_OFFSET:
+            token -= MIDI_START_OFFSET
+            instrument = token // 2**7
+            pitch = token - (2**7)*instrument
+            try:
+                track, previous_time, idx = track_idx[instrument]
+            except KeyError:
+                idx = num_tracks
+                previous_time = 0
+                track = mido.MidiTrack()
+                mid.tracks.append(track)
+                if instrument == 128: # drums always go on channel 9
+                    idx = 9
+                    message = mido.Message('program_change', channel=idx, program=0)
+                else:
+                    message = mido.Message('program_change', channel=idx, program=instrument)
+                track.append(message)
+                num_tracks += 1
+                if num_tracks == 9:
+                    num_tracks += 1 # skip the drums track
+            track.append(mido.Message('note_on', note=pitch, channel=idx, velocity=96, time=time_in_ticks-previous_time))
+            track_idx[instrument] = (track, time_in_ticks, idx)
+        else:
+            token -= MIDI_END_OFFSET
+            instrument = token // 2**7
+            pitch = token - (2**7)*instrument
+            try:
+                track, previous_time, idx = track_idx[instrument]
+            except KeyError:
+                # shouldn't happen because we should have a corresponding onset
+                if debug:
+                    print('IGNORING bad offset')
+                continue
+            track.append(mido.Message('note_off', note=pitch, channel=idx, time=time_in_ticks-previous_time))
+            track_idx[instrument] = (track, time_in_ticks, idx)
+    return mid
+def midi_to_compound(midifile, debug=False):
+    if type(midifile) == str:
+        midi = mido.MidiFile(midifile)
+    else:
+        midi = midifile
+    tokens = []
+    note_idx = 0
+    open_notes = defaultdict(list)
+    time = 0
+    instruments = defaultdict(int) # default to code 0 = piano
+    tempo = 500000 # default tempo: 500000 microseconds per beat
+    for message in midi:
+        time += message.time
+        # sanity check: negative time?
+        if message.time < 0:
+            raise ValueError
+        if message.type == 'program_change':
+            instruments[message.channel] = message.program
+        elif message.type in ['note_on', 'note_off']:
+            # special case: channel 9 is drums!
+            instr = 128 if message.channel == 9 else instruments[message.channel]
+            if message.type == 'note_on' and message.velocity > 0: # onset
+                # time quantization
+                time_in_ticks = round(TIME_RESOLUTION*time)
+                # Our compound word is: (time, duration, note, instr, velocity)
+                tokens.append(time_in_ticks) # 5ms resolution
+                tokens.append(-1) # placeholder (we'll fill this in later)
+                tokens.append(message.note)
+                tokens.append(instr)
+                tokens.append(message.velocity)
+                open_notes[(instr,message.note,message.channel)].append((note_idx, time))
+                note_idx += 1
+            else: # offset
+                try:
+                    open_idx, onset_time = open_notes[(instr,message.note,message.channel)].pop(0)
+                except IndexError:
+                    if debug:
+                        print('WARNING: ignoring bad offset')
+                else:
+                    duration_ticks = round(TIME_RESOLUTION*(time-onset_time))
+                    tokens[5*open_idx + 1] = duration_ticks
+                    #del open_notes[(instr,message.note,message.channel)]
+        elif message.type == 'set_tempo':
+            tempo = message.tempo
+        elif message.type == 'time_signature':
+            pass # we use real time
+        elif message.type in ['aftertouch', 'polytouch', 'pitchwheel', 'sequencer_specific']:
+            pass # we don't attempt to model these
+        elif message.type == 'control_change':
+            pass # this includes pedal and per-track volume: ignore for now
+        elif message.type in ['track_name', 'text', 'end_of_track', 'lyrics', 'key_signature',
+                              'copyright', 'marker', 'instrument_name', 'cue_marker',
+                              'device_name', 'sequence_number']:
+            pass # possibly useful metadata but ignore for now
+        elif message.type == 'channel_prefix':
+            pass # relatively common, but can we ignore this?
+        elif message.type in ['midi_port', 'smpte_offset', 'sysex']:
+            pass # I have no idea what this is
+        else:
+            if debug:
+                print('UNHANDLED MESSAGE', message.type, message)
+    unclosed_count = 0
+    for _,v in open_notes.items():
+        unclosed_count += len(v)
+    if debug and unclosed_count > 0:
+        print(f'WARNING: {unclosed_count} unclosed notes')
+        print('  ', midifile)
+    return tokens
+def compound_to_midi(tokens, debug=False):
+    mid = mido.MidiFile()
+    mid.ticks_per_beat = TIME_RESOLUTION // 2 # 2 beats/second at quarter=120
+    it = iter(tokens)
+    time_index = defaultdict(list)
+    for _, (time_in_ticks,duration,note,instrument,velocity) in enumerate(zip(it,it,it,it,it)):
+        time_index[(time_in_ticks,0)].append((note, instrument, velocity)) # 0 = onset
+        time_index[(time_in_ticks+duration,1)].append((note, instrument, velocity)) # 1 = offset
+    track_idx = {} # maps instrument to (track number, current time)
+    num_tracks = 0
+    for time_in_ticks, event_type in sorted(time_index.keys()):
+        for (note, instrument, velocity) in time_index[(time_in_ticks, event_type)]:
+            if event_type == 0: # onset
+                try:
+                    track, previous_time, idx = track_idx[instrument]
+                except KeyError:
+                    idx = num_tracks
+                    previous_time = 0
+                    track = mido.MidiTrack()
+                    mid.tracks.append(track)
+                    if instrument == 128: # drums always go on channel 9
+                        idx = 9
+                        message = mido.Message('program_change', channel=idx, program=0)
+                    else:
+                        message = mido.Message('program_change', channel=idx, program=instrument)
+                    track.append(message)
+                    num_tracks += 1
+                    if num_tracks == 9:
+                        num_tracks += 1 # skip the drums track
+                track.append(mido.Message(
+                    'note_on', note=note, channel=idx, velocity=velocity,
+                    time=time_in_ticks-previous_time))
+                track_idx[instrument] = (track, time_in_ticks, idx)
+            else: # offset
+                try:
+                    track, previous_time, idx = track_idx[instrument]
+                except KeyError:
+                    # shouldn't happen because we should have a corresponding onset
+                    if debug:
+                        print('IGNORING bad offset')
+                    continue
+                track.append(mido.Message(
+                    'note_off', note=note, channel=idx,
+                    time=time_in_ticks-previous_time))
+                track_idx[instrument] = (track, time_in_ticks, idx)
+    return mid
+def compound_to_events(tokens, stats=False):
+    assert len(tokens) % 5 == 0
+    tokens = tokens.copy()
+    # remove velocities
+    del tokens[4::5]
+    # combine (note, instrument)
+    assert all(-1 <= tok < 2**7 for tok in tokens[2::4])
+    assert all(-1 <= tok < 129 for tok in tokens[3::4])
+    tokens[2::4] = [SEPARATOR if note == -1 else MAX_PITCH*instr + note
+                    for note, instr in zip(tokens[2::4],tokens[3::4])]
+    tokens[2::4] = [NOTE_OFFSET + tok for tok in tokens[2::4]]
+    del tokens[3::4]
+    # max duration cutoff and set unknown durations to 250ms
+    truncations = sum([1 for tok in tokens[1::3] if tok >= MAX_DUR])
+    tokens[1::3] = [TIME_RESOLUTION//4 if tok == -1 else min(tok, MAX_DUR-1)
+                    for tok in tokens[1::3]]
+    tokens[1::3] = [DUR_OFFSET + tok for tok in tokens[1::3]]
+    assert min(tokens[0::3]) >= 0
+    tokens[0::3] = [TIME_OFFSET + tok for tok in tokens[0::3]]
+    assert len(tokens) % 3 == 0
+    if stats:
+        return tokens, truncations
+    return tokens
+def events_to_compound(tokens, debug=False):
+    tokens = unpad(tokens)
+    # move all tokens to zero-offset for synthesis
+    tokens = [tok - CONTROL_OFFSET if tok >= CONTROL_OFFSET and tok != SEPARATOR else tok
+              for tok in tokens]
+    # remove type offsets
+    tokens[0::3] = [tok - TIME_OFFSET if tok != SEPARATOR else tok for tok in tokens[0::3]]
+    tokens[1::3] = [tok - DUR_OFFSET if tok != SEPARATOR else tok for tok in tokens[1::3]]
+    tokens[2::3] = [tok - NOTE_OFFSET if tok != SEPARATOR else tok for tok in tokens[2::3]]
+    offset = 0 # add max time from previous track for synthesis
+    track_max = 0 # keep track of max time in track
+    for j, (time,dur,note) in enumerate(zip(tokens[0::3],tokens[1::3],tokens[2::3])):
+        if note == SEPARATOR:
+            offset += track_max
+            track_max = 0
+            if debug:
+                print('Sequence Boundary')
+        else:
+            track_max = max(track_max, time+dur)
+            tokens[3*j] += offset
+    # strip sequence separators
+    assert len([tok for tok in tokens if tok == SEPARATOR]) % 3 == 0
+    tokens = [tok for tok in tokens if tok != SEPARATOR]
+    assert len(tokens) % 3 == 0
+    out = 5*(len(tokens)//3)*[0]
+    out[0::5] = tokens[0::3]
+    out[1::5] = tokens[1::3]
+    out[2::5] = [tok - (2**7)*(tok//2**7) for tok in tokens[2::3]]
+    out[3::5] = [tok//2**7 for tok in tokens[2::3]]
+    out[4::5] = (len(tokens)//3)*[72] # default velocity
+    assert max(out[1::5]) < MAX_DUR
+    assert max(out[2::5]) < MAX_PITCH
+    assert max(out[3::5]) < MAX_INSTR
+    assert all(tok >= 0 for tok in out)
+    return out
+def events_to_midi(tokens, debug=False):
+    return compound_to_midi(events_to_compound(tokens, debug=debug), debug=debug)
+def midi_to_events(midifile, debug=False):
+    return compound_to_events(midi_to_compound(midifile, debug=debug))

anticipation/convert.py ADDED Viewed

	@@ -0,0 +1,365 @@

+"""
+Utilities for converting to and from Midi data and encoded/tokenized data.
+"""
+from collections import defaultdict
+import mido
+from anticipation.config import *
+from anticipation.vocab import *
+from anticipation.ops import unpad
+def midi_to_interarrival(midifile, debug=False, stats=False):
+    midi = mido.MidiFile(midifile)
+    tokens = []
+    dt = 0
+    instruments = defaultdict(int) # default to code 0 = piano
+    tempo = 500000 # default tempo: 500000 microseconds per beat
+    truncations = 0
+    for message in midi:
+        dt += message.time
+        # sanity check: negative time?
+        if message.time < 0:
+            raise ValueError
+        if message.type == 'program_change':
+            instruments[message.channel] = message.program
+        elif message.type in ['note_on', 'note_off']:
+            delta_ticks = min(round(TIME_RESOLUTION*dt), MAX_INTERARRIVAL-1)
+            if delta_ticks != round(TIME_RESOLUTION*dt):
+                truncations += 1
+            if delta_ticks > 0: # if time elapsed since last token
+                tokens.append(MIDI_TIME_OFFSET + delta_ticks) # add a time step event
+            # special case: channel 9 is drums!
+            inst = 128 if message.channel == 9 else instruments[message.channel]
+            offset = MIDI_START_OFFSET if message.type == 'note_on' and message.velocity > 0 else MIDI_END_OFFSET
+            tokens.append(offset + (2**7)*inst + message.note)
+            dt = 0
+        elif message.type == 'set_tempo':
+            tempo = message.tempo
+        elif message.type == 'time_signature':
+            pass # we use real time
+        elif message.type in ['aftertouch', 'polytouch', 'pitchwheel', 'sequencer_specific']:
+            pass # we don't attempt to model these
+        elif message.type == 'control_change':
+            pass # this includes pedal and per-track volume: ignore for now
+        elif message.type in ['track_name', 'text', 'end_of_track', 'lyrics', 'key_signature',
+                              'copyright', 'marker', 'instrument_name', 'cue_marker',
+                              'device_name', 'sequence_number']:
+            pass # possibly useful metadata but ignore for now
+        elif message.type == 'channel_prefix':
+            pass # relatively common, but can we ignore this?
+        elif message.type in ['midi_port', 'smpte_offset', 'sysex']:
+            pass # I have no idea what this is
+        else:
+            if debug:
+                print('UNHANDLED MESSAGE', message.type, message)
+    if stats:
+        return tokens, truncations
+    return tokens
+def interarrival_to_midi(tokens, debug=False):
+    mid = mido.MidiFile()
+    mid.ticks_per_beat = TIME_RESOLUTION // 2 # 2 beats/second at quarter=120
+    track_idx = {} # maps instrument to (track number, current time)
+    time_in_ticks = 0
+    num_tracks = 0
+    for token in tokens:
+        if token == MIDI_SEPARATOR:
+            continue
+        if token < MIDI_START_OFFSET:
+            time_in_ticks += token - MIDI_TIME_OFFSET
+        elif token < MIDI_END_OFFSET:
+            token -= MIDI_START_OFFSET
+            instrument = token // 2**7
+            pitch = token - (2**7)*instrument
+            try:
+                track, previous_time, idx = track_idx[instrument]
+            except KeyError:
+                idx = num_tracks
+                previous_time = 0
+                track = mido.MidiTrack()
+                mid.tracks.append(track)
+                if instrument == 128: # drums always go on channel 9
+                    idx = 9
+                    message = mido.Message('program_change', channel=idx, program=0)
+                else:
+                    message = mido.Message('program_change', channel=idx, program=instrument)
+                track.append(message)
+                num_tracks += 1
+                if num_tracks == 9:
+                    num_tracks += 1 # skip the drums track
+            track.append(mido.Message('note_on', note=pitch, channel=idx, velocity=96, time=time_in_ticks-previous_time))
+            track_idx[instrument] = (track, time_in_ticks, idx)
+        else:
+            token -= MIDI_END_OFFSET
+            instrument = token // 2**7
+            pitch = token - (2**7)*instrument
+            try:
+                track, previous_time, idx = track_idx[instrument]
+            except KeyError:
+                # shouldn't happen because we should have a corresponding onset
+                if debug:
+                    print('IGNORING bad offset')
+                continue
+            track.append(mido.Message('note_off', note=pitch, channel=idx, time=time_in_ticks-previous_time))
+            track_idx[instrument] = (track, time_in_ticks, idx)
+    return mid
+def midi_to_compound(midifile, debug=False):
+    if type(midifile) == str:
+        midi = mido.MidiFile(midifile)
+    else:
+        midi = midifile
+    tokens = []
+    note_idx = 0
+    open_notes = defaultdict(list)
+    time = 0
+    instruments = defaultdict(lambda: {'program': 0, 'channel': None})  # Track channel assignments
+    next_channel = 0
+    tempo = 500000 # default tempo: 500000 microseconds per beat
+    for message in midi:
+        time += message.time
+        # sanity check: negative time?
+        if message.time < 0:
+            raise ValueError
+        if message.type == 'program_change':
+            # Reserve channels 0-8, 10-15 (skip 9 for drums)
+            if message.channel != 9 and message.channel not in instruments:
+                instruments[message.channel]['program'] = message.program
+                instruments[message.channel]['channel'] = next_channel
+                next_channel += 1
+                if next_channel == 9:  # Skip channel 9 (drums)
+                    next_channel = 10
+        elif message.type in ['note_on', 'note_off']:
+            # special case: channel 9 is drums!
+            instr = 128 if message.channel == 9 else instruments[message.channel]['program']
+            channel = 9 if message.channel == 9 else instruments[message.channel]['channel']
+            compound_instr = (instr << 4) | channel
+            if message.type == 'note_on' and message.velocity > 0: # onset
+                # time quantization
+                time_in_ticks = round(TIME_RESOLUTION*time)
+                # Our compound word is: (time, duration, note, instr, velocity)
+                tokens.append(time_in_ticks) # 5ms resolution
+                tokens.append(-1) # placeholder (we'll fill this in later)
+                tokens.append(message.note)
+                tokens.append(compound_instr)
+                tokens.append(message.velocity)
+                open_notes[(instr,message.note,message.channel)].append((note_idx, time))
+                note_idx += 1
+            else: # offset
+                try:
+                    open_idx, onset_time = open_notes[(instr,message.note,message.channel)].pop(0)
+                except IndexError:
+                    if debug:
+                        print('WARNING: ignoring bad offset')
+                else:
+                    duration_ticks = round(TIME_RESOLUTION*(time-onset_time))
+                    tokens[5*open_idx + 1] = duration_ticks
+                    #del open_notes[(instr,message.note,message.channel)]
+        elif message.type == 'set_tempo':
+            tempo = message.tempo
+        elif message.type == 'time_signature':
+            pass # we use real time
+        elif message.type in ['aftertouch', 'polytouch', 'pitchwheel', 'sequencer_specific']:
+            pass # we don't attempt to model these
+        elif message.type == 'control_change':
+            pass # this includes pedal and per-track volume: ignore for now
+        elif message.type in ['track_name', 'text', 'end_of_track', 'lyrics', 'key_signature',
+                              'copyright', 'marker', 'instrument_name', 'cue_marker',
+                              'device_name', 'sequence_number']:
+            pass # possibly useful metadata but ignore for now
+        elif message.type == 'channel_prefix':
+            pass # relatively common, but can we ignore this?
+        elif message.type in ['midi_port', 'smpte_offset', 'sysex']:
+            pass # I have no idea what this is
+        else:
+            if debug:
+                print('UNHANDLED MESSAGE', message.type, message)
+    unclosed_count = 0
+    for _,v in open_notes.items():
+        unclosed_count += len(v)
+    if debug and unclosed_count > 0:
+        print(f'WARNING: {unclosed_count} unclosed notes')
+        print('  ', midifile)
+    return tokens
+def compound_to_midi(tokens, debug=False):
+    mid = mido.MidiFile()
+    mid.ticks_per_beat = TIME_RESOLUTION // 2 # 2 beats/second at quarter=120
+    tracks = {}
+    for token in tokens:
+        # Decode program and channel
+        program = (token >> 4) & 0x7F
+        channel = token & 0x0F
+        if (program, channel) not in tracks:
+            track = mido.MidiTrack()
+            mid.tracks.append(track)
+            tracks[(program, channel)] = track
+            track.append(mido.Message('program_change',
+                                     program=program,
+                                     channel=channel))
+    it = iter(tokens)
+    time_index = defaultdict(list)
+    for _, (time_in_ticks,duration,note,instrument,velocity) in enumerate(zip(it,it,it,it,it)):
+        time_index[(time_in_ticks,0)].append((note, instrument, velocity)) # 0 = onset
+        time_index[(time_in_ticks+duration,1)].append((note, instrument, velocity)) # 1 = offset
+    track_idx = {} # maps instrument to (track number, current time)
+    num_tracks = 0
+    for time_in_ticks, event_type in sorted(time_index.keys()):
+        for (note, instrument, velocity) in time_index[(time_in_ticks, event_type)]:
+            if event_type == 0: # onset
+                try:
+                    track, previous_time, idx = track_idx[instrument]
+                except KeyError:
+                    idx = num_tracks
+                    previous_time = 0
+                    track = mido.MidiTrack()
+                    mid.tracks.append(track)
+                    if instrument == 128: # drums always go on channel 9
+                        idx = 9
+                        message = mido.Message('program_change', channel=idx, program=0)
+                    else:
+                        message = mido.Message('program_change', channel=idx, program=instrument)
+                    track.append(message)
+                    num_tracks += 1
+                    if num_tracks == 9:
+                        num_tracks += 1 # skip the drums track
+                track.append(mido.Message(
+                    'note_on', note=note, channel=idx, velocity=velocity,
+                    time=time_in_ticks-previous_time))
+                track_idx[instrument] = (track, time_in_ticks, idx)
+            else: # offset
+                try:
+                    track, previous_time, idx = track_idx[instrument]
+                except KeyError:
+                    # shouldn't happen because we should have a corresponding onset
+                    if debug:
+                        print('IGNORING bad offset')
+                    continue
+                track.append(mido.Message(
+                    'note_off', note=note, channel=idx,
+                    time=time_in_ticks-previous_time))
+                track_idx[instrument] = (track, time_in_ticks, idx)
+    return mid
+def compound_to_events(tokens, stats=False):
+    assert len(tokens) % 5 == 0
+    tokens = tokens.copy()
+    # remove velocities
+    del tokens[4::5]
+    # combine (note, instrument)
+    assert all(-1 <= tok < 2**7 for tok in tokens[2::4])
+    assert all(-1 <= tok < 129 for tok in tokens[3::4])
+    tokens[2::4] = [SEPARATOR if note == -1 else MAX_PITCH*instr + note
+                    for note, instr in zip(tokens[2::4],tokens[3::4])]
+    tokens[2::4] = [NOTE_OFFSET + tok for tok in tokens[2::4]]
+    del tokens[3::4]
+    # max duration cutoff and set unknown durations to 250ms
+    truncations = sum([1 for tok in tokens[1::3] if tok >= MAX_DUR])
+    tokens[1::3] = [TIME_RESOLUTION//4 if tok == -1 else min(tok, MAX_DUR-1)
+                    for tok in tokens[1::3]]
+    tokens[1::3] = [DUR_OFFSET + tok for tok in tokens[1::3]]
+    assert min(tokens[0::3]) >= 0
+    tokens[0::3] = [TIME_OFFSET + tok for tok in tokens[0::3]]
+    assert len(tokens) % 3 == 0
+    if stats:
+        return tokens, truncations
+    return tokens
+def events_to_compound(tokens, debug=False):
+    tokens = unpad(tokens)
+    # move all tokens to zero-offset for synthesis
+    tokens = [tok - CONTROL_OFFSET if tok >= CONTROL_OFFSET and tok != SEPARATOR else tok
+              for tok in tokens]
+    # remove type offsets
+    tokens[0::3] = [tok - TIME_OFFSET if tok != SEPARATOR else tok for tok in tokens[0::3]]
+    tokens[1::3] = [tok - DUR_OFFSET if tok != SEPARATOR else tok for tok in tokens[1::3]]
+    tokens[2::3] = [tok - NOTE_OFFSET if tok != SEPARATOR else tok for tok in tokens[2::3]]
+    offset = 0 # add max time from previous track for synthesis
+    track_max = 0 # keep track of max time in track
+    for j, (time,dur,note) in enumerate(zip(tokens[0::3],tokens[1::3],tokens[2::3])):
+        if note == SEPARATOR:
+            offset += track_max
+            track_max = 0
+            if debug:
+                print('Sequence Boundary')
+        else:
+            track_max = max(track_max, time+dur)
+            tokens[3*j] += offset
+    # strip sequence separators
+    assert len([tok for tok in tokens if tok == SEPARATOR]) % 3 == 0
+    tokens = [tok for tok in tokens if tok != SEPARATOR]
+    assert len(tokens) % 3 == 0
+    out = 5*(len(tokens)//3)*[0]
+    out[0::5] = tokens[0::3]
+    out[1::5] = tokens[1::3]
+    out[2::5] = [tok - (2**7)*(tok//2**7) for tok in tokens[2::3]]
+    out[3::5] = [tok//2**7 for tok in tokens[2::3]]
+    out[4::5] = (len(tokens)//3)*[72] # default velocity
+    assert max(out[1::5]) < MAX_DUR
+    assert max(out[2::5]) < MAX_PITCH
+    assert max(out[3::5]) < MAX_INSTR
+    assert all(tok >= 0 for tok in out)
+    return out
+def events_to_midi(tokens, debug=False):
+    return compound_to_midi(events_to_compound(tokens, debug=debug), debug=debug)
+def midi_to_events(midifile, debug=False):
+    return compound_to_events(midi_to_compound(midifile, debug=debug))

anticipation/ops.py ADDED Viewed

	@@ -0,0 +1,285 @@

+"""
+Utilities for operating on encoded Midi sequences.
+"""
+from collections import defaultdict
+from anticipation.config import *
+from anticipation.vocab import *
+def print_tokens(tokens):
+    print('---------------------')
+    for j, (tm, dur, note) in enumerate(zip(tokens[0::3],tokens[1::3],tokens[2::3])):
+        if note == SEPARATOR:
+            assert tm == SEPARATOR and dur == SEPARATOR
+            print(j, 'SEPARATOR')
+            continue
+        if note == REST:
+            assert tm < CONTROL_OFFSET
+            assert dur == DUR_OFFSET+0
+            print(j, tm, 'REST')
+            continue
+        if note < CONTROL_OFFSET:
+            tm = tm - TIME_OFFSET
+            dur = dur - DUR_OFFSET
+            note = note - NOTE_OFFSET
+            instr = note//2**7
+            pitch = note - (2**7)*instr
+            print(j, tm, dur, instr, pitch)
+        else:
+            tm = tm - ATIME_OFFSET
+            dur = dur - ADUR_OFFSET
+            note = note - ANOTE_OFFSET
+            instr = note//2**7
+            pitch = note - (2**7)*instr
+            print(j, tm, dur, instr, pitch, '(A)')
+def clip(tokens, start, end, clip_duration=True, seconds=True):
+    if seconds:
+        start = int(TIME_RESOLUTION*start)
+        end = int(TIME_RESOLUTION*end)
+    new_tokens = []
+    for (time, dur, note) in zip(tokens[0::3],tokens[1::3],tokens[2::3]):
+        if note < CONTROL_OFFSET:
+            this_time = time - TIME_OFFSET
+            this_dur = dur - DUR_OFFSET
+        else:
+            this_time = time - ATIME_OFFSET
+            this_dur = dur - ADUR_OFFSET
+        if this_time < start or end < this_time:
+            continue
+        # truncate extended notes
+        if clip_duration and end < this_time + this_dur:
+            dur -= this_time + this_dur - end
+        new_tokens.extend([time, dur, note])
+    return new_tokens
+def mask(tokens, start, end):
+    new_tokens = []
+    for (time, dur, note) in zip(tokens[0::3],tokens[1::3],tokens[2::3]):
+        if note < CONTROL_OFFSET:
+            this_time = (time - TIME_OFFSET)/float(TIME_RESOLUTION)
+        else:
+            this_time = (time - ATIME_OFFSET)/float(TIME_RESOLUTION)
+        if start < this_time < end:
+            continue
+        new_tokens.extend([time, dur, note])
+    return new_tokens
+def delete(tokens, criterion):
+    new_tokens = []
+    for token in zip(tokens[0::3],tokens[1::3],tokens[2::3]):
+        if criterion(token):
+            continue
+        new_tokens.extend(token)
+    return new_tokens
+def sort(tokens):
+    """ sort sequence of events or controls (but not both) """
+    times = tokens[0::3]
+    indices = sorted(range(len(times)), key=times.__getitem__)
+    sorted_tokens = []
+    for idx in indices:
+        sorted_tokens.extend(tokens[3*idx:3*(idx+1)])
+    return sorted_tokens
+def split(tokens):
+    """ split a sequence into events and controls """
+    events = []
+    controls = []
+    for (time, dur, note) in zip(tokens[0::3],tokens[1::3],tokens[2::3]):
+        if note < CONTROL_OFFSET:
+            events.extend([time, dur, note])
+        else:
+            controls.extend([time, dur, note])
+    return events, controls
+def pad(tokens, end_time=None, density=TIME_RESOLUTION):
+    end_time = TIME_OFFSET+(end_time if end_time else max_time(tokens, seconds=False))
+    new_tokens = []
+    previous_time = TIME_OFFSET+0
+    for (time, dur, note) in zip(tokens[0::3],tokens[1::3],tokens[2::3]):
+        # must pad before separation, anticipation
+        assert note < CONTROL_OFFSET
+        # insert pad tokens to ensure the desired density
+        while time > previous_time + density:
+            new_tokens.extend([previous_time+density, DUR_OFFSET+0, REST])
+            previous_time += density
+        new_tokens.extend([time, dur, note])
+        previous_time = time
+    while end_time > previous_time + density:
+        new_tokens.extend([previous_time+density, DUR_OFFSET+0, REST])
+        previous_time += density
+    return new_tokens
+def unpad(tokens):
+    new_tokens = []
+    for (time, dur, note) in zip(tokens[0::3],tokens[1::3],tokens[2::3]):
+        if note == REST: continue
+        new_tokens.extend([time, dur, note])
+    return new_tokens
+def anticipate(events, controls, delta=DELTA*TIME_RESOLUTION):
+    """
+    Interleave a sequence of events with anticipated controls.
+    Inputs:
+      events   : a sequence of events
+      controls : a sequence of time-localized controls
+      delta    : the anticipation interval
+    Returns:
+      tokens   : interleaved events and anticipated controls
+      controls : unconsumed controls (control time > max_time(events) + delta)
+    """
+    if len(controls) == 0:
+        return events, controls
+    tokens = []
+    event_time = 0
+    control_time = controls[0] - ATIME_OFFSET
+    for time, dur, note in zip(events[0::3],events[1::3],events[2::3]):
+        while event_time >= control_time - delta:
+            tokens.extend(controls[0:3])
+            controls = controls[3:] # consume this control
+            control_time = controls[0] - ATIME_OFFSET if len(controls) > 0 else float('inf')
+        assert note < CONTROL_OFFSET
+        event_time = time - TIME_OFFSET
+        tokens.extend([time, dur, note])
+    return tokens, controls
+def sparsity(tokens):
+    max_dt = 0
+    previous_time = TIME_OFFSET+0
+    for (time, dur, note) in zip(tokens[0::3],tokens[1::3],tokens[2::3]):
+        if note == SEPARATOR: continue
+        assert note < CONTROL_OFFSET # don't operate on interleaved sequences
+        max_dt = max(max_dt, time - previous_time)
+        previous_time = time
+    return max_dt
+def min_time(tokens, seconds=True, instr=None):
+    mt = None
+    for time, dur, note in zip(tokens[0::3],tokens[1::3],tokens[2::3]):
+        # stop calculating at sequence separator
+        if note == SEPARATOR: break
+        if note < CONTROL_OFFSET:
+            time -= TIME_OFFSET
+            note -= NOTE_OFFSET
+        else:
+            time -= ATIME_OFFSET
+            note -= ANOTE_OFFSET
+        # min time of a particular instrument
+        if instr is not None and instr != note//2**7:
+            continue
+        mt = time if mt is None else min(mt, time)
+    if mt is None: mt = 0
+    return mt/float(TIME_RESOLUTION) if seconds else mt
+def max_time(tokens, seconds=True, instr=None):
+    mt = 0
+    for time, dur, note in zip(tokens[0::3],tokens[1::3],tokens[2::3]):
+        # keep checking for max_time, even if it appears after a separator
+        # (this is important because we use this check for vocab overflow in tokenization)
+        if note == SEPARATOR: continue
+        if note < CONTROL_OFFSET:
+            time -= TIME_OFFSET
+            note -= NOTE_OFFSET
+        else:
+            time -= ATIME_OFFSET
+            note -= ANOTE_OFFSET
+        # max time of a particular instrument
+        if instr is not None and instr != note//2**7:
+            continue
+        mt = max(mt, time)
+    return mt/float(TIME_RESOLUTION) if seconds else mt
+def get_instruments(tokens):
+    instruments = defaultdict(int)
+    for time, dur, note in zip(tokens[0::3],tokens[1::3],tokens[2::3]):
+        if note >= SPECIAL_OFFSET: continue
+        if note < CONTROL_OFFSET:
+            note -= NOTE_OFFSET
+        else:
+            note -= ANOTE_OFFSET
+        instr = note//2**7
+        instruments[instr] += 1
+    return instruments
+def translate(tokens, dt, seconds=False):
+    if seconds:
+        dt = int(TIME_RESOLUTION*dt)
+    new_tokens = []
+    for (time, dur, note) in zip(tokens[0::3],tokens[1::3],tokens[2::3]):
+        # stop translating after EOT
+        if note == SEPARATOR:
+            new_tokens.extend([time, dur, note])
+            dt = 0
+            continue
+        if note < CONTROL_OFFSET:
+            this_time = time - TIME_OFFSET
+        else:
+            this_time = time - ATIME_OFFSET
+        assert 0 <= this_time + dt
+        new_tokens.extend([time+dt, dur, note])
+    return new_tokens
+def combine(events, controls):
+    return sort(events + [token - CONTROL_OFFSET for token in controls])

anticipation/sample.py ADDED Viewed

	@@ -0,0 +1,280 @@

+"""
+API functions for sampling from anticipatory infilling models.
+"""
+import math
+import torch
+import torch.nn.functional as F
+from tqdm import tqdm
+from anticipation import ops
+from anticipation.config import *
+from anticipation.vocab import *
+def safe_logits(logits, idx):
+    logits[CONTROL_OFFSET:SPECIAL_OFFSET] = -float('inf') # don't generate controls
+    logits[SPECIAL_OFFSET:] = -float('inf')               # don't generate special tokens
+    # don't generate stuff in the wrong time slot
+    if idx % 3 == 0:
+        logits[DUR_OFFSET:DUR_OFFSET+MAX_DUR] = -float('inf')
+        logits[NOTE_OFFSET:NOTE_OFFSET+MAX_NOTE] = -float('inf')
+    elif idx % 3 == 1:
+        logits[TIME_OFFSET:TIME_OFFSET+MAX_TIME] = -float('inf')
+        logits[NOTE_OFFSET:NOTE_OFFSET+MAX_NOTE] = -float('inf')
+    elif idx % 3 == 2:
+        logits[TIME_OFFSET:TIME_OFFSET+MAX_TIME] = -float('inf')
+        logits[DUR_OFFSET:DUR_OFFSET+MAX_DUR] = -float('inf')
+    return logits
+def nucleus(logits, top_p):
+    # from HF implementation
+    if top_p < 1.0:
+        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
+        # Remove tokens with cumulative probability above the threshold (token with 0 are kept)
+        sorted_indices_to_remove = cumulative_probs > top_p
+        # Shift the indices to the right to keep also the first token above the threshold
+        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+        sorted_indices_to_remove[..., 0] = 0
+        # scatter sorted tensors to original indexing
+        indices_to_remove = sorted_indices_to_remove.scatter(0, sorted_indices, sorted_indices_to_remove)
+        logits[indices_to_remove] = -float("inf")
+    return logits
+def future_logits(logits, curtime):
+    """ don't sample events in the past """
+    if curtime > 0:
+        logits[TIME_OFFSET:TIME_OFFSET+curtime] = -float('inf')
+    return logits
+def instr_logits(logits, full_history):
+    """ don't sample more than 16 instruments """
+    instrs = ops.get_instruments(full_history)
+    if len(instrs) < 15: # 16 - 1 to account for the reserved drum track
+        return logits
+    for instr in range(MAX_INSTR):
+        if instr not in instrs:
+            logits[NOTE_OFFSET+instr*MAX_PITCH:NOTE_OFFSET+(instr+1)*MAX_PITCH] = -float('inf')
+    return logits
+def add_token(model, z, tokens, top_p, current_time, debug=False):
+    assert len(tokens) % 3 == 0
+    history = tokens.copy()
+    lookback = max(len(tokens) - 1017, 0)
+    history = history[lookback:] # Markov window
+    offset = ops.min_time(history, seconds=False)
+    history[::3] = [tok - offset for tok in history[::3]] # relativize time in the history buffer
+    new_token = []
+    with torch.no_grad():
+        for i in range(3):
+            input_tokens = torch.tensor(z + history + new_token).unsqueeze(0).to(model.device)
+            logits = model(input_tokens).logits[0,-1]
+            idx = input_tokens.shape[1]-1
+            logits = safe_logits(logits, idx)
+            if i == 0:
+                logits = future_logits(logits, current_time - offset)
+            elif i == 2:
+                logits = instr_logits(logits, tokens)
+            logits = nucleus(logits, top_p)
+            probs = F.softmax(logits, dim=-1)
+            token = torch.multinomial(probs, 1)
+            new_token.append(int(token))
+    new_token[0] += offset # revert to full sequence timing
+    if debug:
+        print(f'  OFFSET = {offset}, LEN = {len(history)}, TIME = {tokens[::3][-5:]}')
+    return new_token
+def generate(model, start_time, end_time, inputs=None, controls=None, top_p=1.0, debug=False, delta=DELTA*TIME_RESOLUTION):
+    if inputs is None:
+        inputs = []
+    if controls is None:
+        controls = []
+    start_time = int(TIME_RESOLUTION*start_time)
+    end_time = int(TIME_RESOLUTION*end_time)
+    # prompt is events up to start_time
+    prompt = ops.pad(ops.clip(inputs, 0, start_time, clip_duration=False, seconds=False), start_time)
+    # treat events beyond start_time as controls
+    future = ops.clip(inputs, start_time+1, ops.max_time(inputs, seconds=False), clip_duration=False, seconds=False)
+    if debug:
+        print('Future')
+        ops.print_tokens(future)
+    # clip controls that preceed the sequence
+    controls = ops.clip(controls, DELTA, ops.max_time(controls, seconds=False), clip_duration=False, seconds=False)
+    if debug:
+        print('Controls')
+        ops.print_tokens(controls)
+    z = [ANTICIPATE] if len(controls) > 0 or len(future) > 0 else [AUTOREGRESS]
+    if debug:
+        print('AR Mode' if z[0] == AUTOREGRESS else 'AAR Mode')
+    # interleave the controls with the events
+    tokens, controls = ops.anticipate(prompt, ops.sort(controls + [CONTROL_OFFSET+token for token in future]))
+    if debug:
+        print('Prompt')
+        ops.print_tokens(tokens)
+    current_time = ops.max_time(prompt, seconds=False)
+    if debug:
+        print('Current time:', current_time)
+    with tqdm(range(end_time-start_time)) as progress:
+        if controls:
+            atime, adur, anote = controls[0:3]
+            anticipated_tokens = controls[3:]
+            anticipated_time = atime - ATIME_OFFSET
+        else:
+            # nothing to anticipate
+            anticipated_time = math.inf
+        while True:
+            while current_time >= anticipated_time - delta:
+                tokens.extend([atime, adur, anote])
+                if debug:
+                    note = anote - ANOTE_OFFSET
+                    instr = note//2**7
+                    print('A', atime - ATIME_OFFSET, adur - ADUR_OFFSET, instr, note - (2**7)*instr)
+                if len(anticipated_tokens) > 0:
+                    atime, adur, anote = anticipated_tokens[0:3]
+                    anticipated_tokens = anticipated_tokens[3:]
+                    anticipated_time = atime - ATIME_OFFSET
+                else:
+                    # nothing more to anticipate
+                    anticipated_time = math.inf
+            new_token = add_token(model, z, tokens, top_p, max(start_time,current_time))
+            new_time = new_token[0] - TIME_OFFSET
+            if new_time >= end_time:
+                break
+            if debug:
+                new_note = new_token[2] - NOTE_OFFSET
+                new_instr = new_note//2**7
+                new_pitch = new_note - (2**7)*new_instr
+                print('C', new_time, new_token[1] - DUR_OFFSET, new_instr, new_pitch)
+            tokens.extend(new_token)
+            dt = new_time - current_time
+            assert dt >= 0
+            current_time = new_time
+            progress.update(dt)
+    events, _ = ops.split(tokens)
+    return ops.sort(ops.unpad(events) + future)
+def generate_ar(model, start_time, end_time, inputs=None, controls=None, top_p=1.0, debug=False, delta=DELTA*TIME_RESOLUTION):
+    if inputs is None:
+        inputs = []
+    if controls is None:
+        controls = []
+    else:
+        # treat controls as ordinary tokens
+        controls = [token-CONTROL_OFFSET for token in controls]
+    start_time = int(TIME_RESOLUTION*start_time)
+    end_time = int(TIME_RESOLUTION*end_time)
+    inputs = ops.sort(inputs + controls)
+    # prompt is events up to start_time
+    prompt = ops.pad(ops.clip(inputs, 0, start_time, clip_duration=False, seconds=False), start_time)
+    if debug:
+        print('Prompt')
+        ops.print_tokens(prompt)
+    # treat events beyond start_time as controls
+    controls = ops.clip(inputs, start_time+1, ops.max_time(inputs, seconds=False), clip_duration=False, seconds=False)
+    if debug:
+        print('Future')
+        ops.print_tokens(controls)
+    z = [AUTOREGRESS]
+    if debug:
+        print('AR Mode')
+    current_time = ops.max_time(prompt, seconds=False)
+    if debug:
+        print('Current time:', current_time)
+    tokens = prompt
+    with tqdm(range(end_time-start_time)) as progress:
+        if controls:
+            atime, adur, anote = controls[0:3]
+            anticipated_tokens = controls[3:]
+            anticipated_time = atime - TIME_OFFSET
+        else:
+            # nothing to anticipate
+            anticipated_time = math.inf
+        while True:
+            new_token = add_token(model, z, tokens, top_p, max(start_time,current_time))
+            new_time = new_token[0] - TIME_OFFSET
+            if new_time >= end_time:
+                break
+            dt = new_time - current_time
+            assert dt >= 0
+            current_time = new_time
+            # backfill anything that should have come before the new token
+            while current_time >= anticipated_time:
+                tokens.extend([atime, adur, anote])
+                if debug:
+                    note = anote - NOTE_OFFSET
+                    instr = note//2**7
+                    print('A', atime - TIME_OFFSET, adur - DUR_OFFSET, instr, note - (2**7)*instr)
+                if len(anticipated_tokens) > 0:
+                    atime, adur, anote = anticipated_tokens[0:3]
+                    anticipated_tokens = anticipated_tokens[3:]
+                    anticipated_time = atime - TIME_OFFSET
+                else:
+                    # nothing more to anticipate
+                    anticipated_time = math.inf
+            if debug:
+                new_note = new_token[2] - NOTE_OFFSET
+                new_instr = new_note//2**7
+                new_pitch = new_note - (2**7)*new_instr
+                print('C', new_time, new_token[1] - DUR_OFFSET, new_instr, new_pitch)
+            tokens.extend(new_token)
+            progress.update(dt)
+    if anticipated_time != math.inf:
+        tokens.extend([atime, adur, anote])
+    return ops.sort(ops.unpad(tokens) + controls)

anticipation/tokenize.py ADDED Viewed

	@@ -0,0 +1,219 @@

+"""
+Top-level functions for preprocessing data to be used for training.
+"""
+from tqdm import tqdm
+import numpy as np
+from anticipation import ops
+from anticipation.config import *
+from anticipation.vocab import *
+from anticipation.convert import compound_to_events, midi_to_interarrival
+def extract_spans(all_events, rate):
+    events = []
+    controls = []
+    span = True
+    next_span = end_span = TIME_OFFSET+0
+    for time, dur, note in zip(all_events[0::3],all_events[1::3],all_events[2::3]):
+        assert(note not in [SEPARATOR, REST]) # shouldn't be in the sequence yet
+        # end of an anticipated span; decide when to do it again (next_span)
+        if span and time >= end_span:
+            span = False
+            next_span = time+int(TIME_RESOLUTION*np.random.exponential(1./rate))
+        # anticipate a 3-second span
+        if (not span) and time >= next_span:
+            span = True
+            end_span = time + DELTA*TIME_RESOLUTION
+        if span:
+            # mark this event as a control
+            controls.extend([CONTROL_OFFSET+time, CONTROL_OFFSET+dur, CONTROL_OFFSET+note])
+        else:
+            events.extend([time, dur, note])
+    return events, controls
+ANTICIPATION_RATES = 10
+def extract_random(all_events, rate):
+    events = []
+    controls = []
+    for time, dur, note in zip(all_events[0::3],all_events[1::3],all_events[2::3]):
+        assert(note not in [SEPARATOR, REST]) # shouldn't be in the sequence yet
+        if np.random.random() < rate/float(ANTICIPATION_RATES):
+            # mark this event as a control
+            controls.extend([CONTROL_OFFSET+time, CONTROL_OFFSET+dur, CONTROL_OFFSET+note])
+        else:
+            events.extend([time, dur, note])
+    return events, controls
+def extract_instruments(all_events, instruments):
+    events = []
+    controls = []
+    for time, dur, note in zip(all_events[0::3],all_events[1::3],all_events[2::3]):
+        assert note < CONTROL_OFFSET         # shouldn't be in the sequence yet
+        assert note not in [SEPARATOR, REST] # these shouldn't either
+        instr = (note-NOTE_OFFSET)//2**7
+        if instr in instruments:
+            # mark this event as a control
+            controls.extend([CONTROL_OFFSET+time, CONTROL_OFFSET+dur, CONTROL_OFFSET+note])
+        else:
+            events.extend([time, dur, note])
+    return events, controls
+def maybe_tokenize(compound_tokens):
+    # skip sequences with very few events
+    if len(compound_tokens) < COMPOUND_SIZE*MIN_TRACK_EVENTS:
+        return None, None, 1 # short track
+    events, truncations = compound_to_events(compound_tokens, stats=True)
+    end_time = ops.max_time(events, seconds=False)
+    # don't want to deal with extremely short tracks
+    if end_time < TIME_RESOLUTION*MIN_TRACK_TIME_IN_SECONDS:
+        return None, None, 1 # short track
+    # don't want to deal with extremely long tracks
+    if end_time > TIME_RESOLUTION*MAX_TRACK_TIME_IN_SECONDS:
+        return None, None, 2 # long track
+    # skip sequences more instruments than MIDI channels (16)
+    if len(ops.get_instruments(events)) > MAX_TRACK_INSTR:
+        return None, None, 3 # too many instruments
+    return events, truncations, 0
+def tokenize_ia(datafiles, output, augment_factor, idx=0, debug=False):
+    assert augment_factor == 1 # can't augment interarrival-tokenized data
+    all_truncations = 0
+    seqcount = rest_count = 0
+    stats = 4*[0] # (short, long, too many instruments, inexpressible)
+    np.random.seed(0)
+    with open(output, 'w') as outfile:
+        concatenated_tokens = []
+        for j, filename in tqdm(list(enumerate(datafiles)), desc=f'#{idx}', position=idx+1, leave=True):
+            with open(filename, 'r') as f:
+                _, _, status = maybe_tokenize([int(token) for token in f.read().split()])
+            if status > 0:
+                stats[status-1] += 1
+                continue
+            filename = filename[:-len('.compound.txt')] # get the original MIDI
+            # already parsed; shouldn't raise an exception
+            tokens, truncations = midi_to_interarrival(filename, stats=True)
+            tokens[0:0] = [MIDI_SEPARATOR]
+            concatenated_tokens.extend(tokens)
+            all_truncations += truncations
+            # write out full sequences to file
+            while len(concatenated_tokens) >= CONTEXT_SIZE:
+                seq = concatenated_tokens[0:CONTEXT_SIZE]
+                concatenated_tokens = concatenated_tokens[CONTEXT_SIZE:]
+                outfile.write(' '.join([str(tok) for tok in seq]) + '\n')
+                seqcount += 1
+    if debug:
+        fmt = 'Processed {} sequences (discarded {} tracks, discarded {} seqs, added {} rest tokens)'
+        print(fmt.format(seqcount, stats[0]+stats[1]+stats[2], stats[3], rest_count))
+    return (seqcount, rest_count, stats[0], stats[1], stats[2], stats[3], all_truncations)
+def tokenize(datafiles, output, augment_factor, idx=0, debug=False):
+    tokens = []
+    all_truncations = 0
+    seqcount = rest_count = 0
+    stats = 4*[0] # (short, long, too many instruments, inexpressible)
+    np.random.seed(0)
+    with open(output, 'w') as outfile:
+        concatenated_tokens = []
+        for j, filename in tqdm(list(enumerate(datafiles)), desc=f'#{idx}', position=idx+1, leave=True):
+            with open(filename, 'r') as f:
+                all_events, truncations, status = maybe_tokenize([int(token) for token in f.read().split()])
+            if status > 0:
+                stats[status-1] += 1
+                continue
+            instruments = list(ops.get_instruments(all_events).keys())
+            end_time = ops.max_time(all_events, seconds=False)
+            # different random augmentations
+            for k in range(augment_factor):
+                if k % 10 == 0:
+                    # no augmentation
+                    events = all_events.copy()
+                    controls = []
+                elif k % 10 == 1:
+                    # span augmentation
+                    lmbda = .05
+                    events, controls = extract_spans(all_events, lmbda)
+                elif k % 10 < 6:
+                    # random augmentation
+                    r = np.random.randint(1,ANTICIPATION_RATES)
+                    events, controls = extract_random(all_events, r)
+                else:
+                    if len(instruments) > 1:
+                        # instrument augmentation: at least one, but not all instruments
+                        u = 1+np.random.randint(len(instruments)-1)
+                        subset = np.random.choice(instruments, u, replace=False)
+                        events, controls = extract_instruments(all_events, subset)
+                    else:
+                        # no augmentation
+                        events = all_events.copy()
+                        controls = []
+                if len(concatenated_tokens) == 0:
+                    z = ANTICIPATE if k % 10 != 0 else AUTOREGRESS
+                all_truncations += truncations
+                events = ops.pad(events, end_time)
+                rest_count += sum(1 if tok == REST else 0 for tok in events[2::3])
+                tokens, controls = ops.anticipate(events, controls)
+                assert len(controls) == 0 # should have consumed all controls (because of padding)
+                tokens[0:0] = [SEPARATOR, SEPARATOR, SEPARATOR]
+                concatenated_tokens.extend(tokens)
+                # write out full sequences to file
+                while len(concatenated_tokens) >= EVENT_SIZE*M:
+                    seq = concatenated_tokens[0:EVENT_SIZE*M]
+                    concatenated_tokens = concatenated_tokens[EVENT_SIZE*M:]
+                    # relativize time to the context
+                    seq = ops.translate(seq, -ops.min_time(seq, seconds=False), seconds=False)
+                    assert ops.min_time(seq, seconds=False) == 0
+                    if ops.max_time(seq, seconds=False) >= MAX_TIME:
+                        stats[3] += 1
+                        continue
+                    # if seq contains SEPARATOR, global controls describe the first sequence
+                    seq.insert(0, z)
+                    outfile.write(' '.join([str(tok) for tok in seq]) + '\n')
+                    seqcount += 1
+                    # grab the current augmentation controls if we didn't already
+                    z = ANTICIPATE if k % 10 != 0 else AUTOREGRESS
+    if debug:
+        fmt = 'Processed {} sequences (discarded {} tracks, discarded {} seqs, added {} rest tokens)'
+        print(fmt.format(seqcount, stats[0]+stats[1]+stats[2], stats[3], rest_count))
+    return (seqcount, rest_count, stats[0], stats[1], stats[2], stats[3], all_truncations)

anticipation/visuals.py ADDED Viewed

	@@ -0,0 +1,65 @@

+"""
+Utilities for inspecting encoded music data.
+"""
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+import anticipation.ops as ops
+from anticipation.config import *
+from anticipation.vocab import *
+def visualize(tokens, output, selected=None):
+    #colors = ['white', 'silver', 'red', 'sienna', 'darkorange', 'gold', 'yellow', 'palegreen', 'seagreen', 'cyan',
+    #          'dodgerblue', 'slategray', 'navy', 'mediumpurple', 'mediumorchid', 'magenta', 'lightpink']
+    colors = ['white', '#426aa0', '#b26789', '#de9283', '#eac29f', 'silver', 'red', 'sienna', 'darkorange', 'gold', 'yellow', 'palegreen', 'seagreen', 'cyan', 'dodgerblue', 'slategray', 'navy']
+    plt.rcParams['figure.dpi'] = 300
+    plt.rcParams['savefig.dpi'] = 300
+    max_time = ops.max_time(tokens, seconds=False)
+    grid = np.zeros([max_time, MAX_PITCH])
+    instruments = list(sorted(list(ops.get_instruments(tokens).keys())))
+    if 128 in instruments:
+        instruments.remove(128)
+    for j, (tm, dur, note) in enumerate(zip(tokens[0::3],tokens[1::3],tokens[2::3])):
+        if note == SEPARATOR:
+            assert tm == SEPARATOR and dur == SEPARATOR
+            print(j, 'SEPARATOR')
+            continue
+        if note == REST:
+            continue
+        assert note < CONTROL_OFFSET
+        tm = tm - TIME_OFFSET
+        dur = dur - DUR_OFFSET
+        note = note - NOTE_OFFSET
+        instr = note//2**7
+        pitch = note - (2**7)*instr
+        if instr == 128: # drums
+            continue     # we don't visualize this
+        if selected and instr not in selected:
+            continue
+        grid[tm:tm+dur, pitch] = 1+instruments.index(instr)
+    plt.clf()
+    plt.axis('off')
+    cmap = matplotlib.colors.ListedColormap(colors)
+    bounds = list(range(MAX_TRACK_INSTR)) + [16]
+    norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
+    plt.imshow(np.flipud(grid.T), aspect=16, cmap=cmap, norm=norm, interpolation='none')
+    patches = [matplotlib.patches.Patch(color=colors[i+1], label=f"{instruments[i]}")
+               for i in range(len(instruments))]
+    plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0. )
+    plt.tight_layout()
+    plt.savefig(output)

anticipation/vocab.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""
+The vocabularies used for arrival-time and interarrival-time encodings.
+"""
+# training sequence vocab
+from anticipation.config import *
+# the event block
+EVENT_OFFSET = 0
+TIME_OFFSET = EVENT_OFFSET
+DUR_OFFSET = TIME_OFFSET + MAX_TIME
+NOTE_OFFSET = DUR_OFFSET + MAX_DUR
+REST = NOTE_OFFSET + MAX_NOTE
+# the control block
+CONTROL_OFFSET = NOTE_OFFSET + MAX_NOTE + 1
+ATIME_OFFSET = CONTROL_OFFSET + 0
+ADUR_OFFSET = ATIME_OFFSET + MAX_TIME
+ANOTE_OFFSET = ADUR_OFFSET + MAX_DUR
+# the special block
+SPECIAL_OFFSET = ANOTE_OFFSET + MAX_NOTE
+SEPARATOR = SPECIAL_OFFSET
+AUTOREGRESS = SPECIAL_OFFSET + 1
+ANTICIPATE = SPECIAL_OFFSET + 2
+VOCAB_SIZE = ANTICIPATE+1
+# interarrival-time (MIDI-like) vocab
+MIDI_TIME_OFFSET = 0
+MIDI_START_OFFSET = MIDI_TIME_OFFSET + MAX_INTERARRIVAL
+MIDI_END_OFFSET = MIDI_START_OFFSET + MAX_NOTE
+MIDI_SEPARATOR = MIDI_END_OFFSET + MAX_NOTE
+MIDI_VOCAB_SIZE = MIDI_SEPARATOR + 1
+if __name__ == '__main__':
+    print('Arrival-Time Training Sequence Format:')
+    print('Event Offset: ', EVENT_OFFSET)
+    print('  -> time offset :', TIME_OFFSET)
+    print('  -> duration offset :', DUR_OFFSET)
+    print('  -> note offset :', NOTE_OFFSET)
+    print('  -> rest token: ', REST)
+    print('Anticipated Control Offset: ', CONTROL_OFFSET)
+    print('  -> anticipated time offset :', ATIME_OFFSET)
+    print('  -> anticipated duration offset :', ADUR_OFFSET)
+    print('  -> anticipated note offset :', ANOTE_OFFSET)
+    print('Special Token Offset: ', SPECIAL_OFFSET)
+    print('  -> separator token: ', SEPARATOR)
+    print('  -> autoregression flag: ', AUTOREGRESS)
+    print('  -> anticipation flag: ', ANTICIPATE)
+    print('Arrival Encoding Vocabulary Size: ', VOCAB_SIZE)
+    print('')
+    print('Interarrival-Time Training Sequence Format:')
+    print('  -> time offset: ', MIDI_TIME_OFFSET)
+    print('  -> note-on offset: ', MIDI_START_OFFSET)
+    print('  -> note-off offset: ', MIDI_END_OFFSET)
+    print('  -> separator token: ', MIDI_SEPARATOR)
+    print('Interarrival Encoding Vocabulary Size: ', MIDI_VOCAB_SIZE)

api.py ADDED Viewed

	@@ -0,0 +1,240 @@

+from agents.agents import harmonizer, infiller, change_melody
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import mido
+import tempfile
+import os
+import music21
+import traceback
+from uuid import uuid4
+import threading
+from transformers import AutoModelForCausalLM
+app = Flask(__name__)
+CORS(app)
+@app.after_request
+def add_cors_headers(response):
+    # Allow only your domain
+    response.headers['Access-Control-Allow-Origin'] = 'https://https://inscoreai.netlify.app/.com'
+    response.headers['Access-Control-Allow-Methods'] = 'GET, POST'
+    response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
+    return response
+def midi_to_musicxml(midi_file_path):
+    """Convert MIDI file to MusicXML string with absolute safety"""
+    try:
+        midi_path_str = str(midi_file_path)
+        # Parse and convert to MusicXML
+        score = music21.converter.parse(midi_path_str)
+        # Create temporary output file path
+        temp_output = os.path.join(tempfile.gettempdir(), f"output_{uuid4().hex}.musicxml")
+        # Write to temporary file
+        score.write('musicxml', temp_output)
+        # Read back as string
+        with open(temp_output, 'r') as f:
+            musicxml_str = f.read()
+        # Clean up
+        os.unlink(temp_output)
+        return musicxml_str
+    except Exception as e:
+        print(f"Conversion error: {str(e)}")
+        traceback.print_exc()
+        raise
+def load_model():
+    global MODEL
+    with MODEL_LOCK:
+        if MODEL is None:
+            print("⏳ Loading music generation model...")
+            MODEL = AutoModelForCausalLM.from_pretrained('stanford-crfm/music-small-800k',local_files_only=True, force_download=False)    # Prevent re-downloads
+            # Add .cuda() here if using GPU
+            print("✅ Model loaded successfully!")
+    return MODEL
+# Model loading setup
+MODEL = None
+MODEL_LOCK = threading.Lock()
+# Initialize model when app starts
+with app.app_context():
+    load_model()
+@app.route('/upload', methods=['POST'])
+def handle_upload():
+    temp_midi_path = None
+    top_p = float(request.form.get('top_p', '0.95'))
+    try:
+        # Validate input
+        if 'midi_file' not in request.files:
+            return jsonify({"status": "error", "message": "No MIDI file provided"}), 400
+        midi_file = request.files['midi_file']
+        start_time = request.form.get('start_time', '0')
+        end_time = request.form.get('end_time', '0')
+        # Create temporary MIDI file with random name
+        temp_dir = tempfile.gettempdir()
+        temp_midi_path = os.path.join(temp_dir, f"temp_{uuid4().hex}.mid")
+        # Save uploaded MIDI to temp file
+        midi_file.save(temp_midi_path)
+        # Process MIDI
+        midi = mido.MidiFile(temp_midi_path)
+        model = load_model()
+        harmonized_midi = harmonizer(model,midi, int(start_time)/1000, int(end_time)/1000,top_p=top_p)
+        # Save harmonized MIDI (overwriting temp file)
+        harmonized_midi.save(temp_midi_path)
+        # Convert to MusicXML string
+        musicxml_str = midi_to_musicxml(temp_midi_path)
+        # Final type verification
+        if not isinstance(musicxml_str, str):
+            raise TypeError(f"Expected string but got {type(musicxml_str)}")
+        return jsonify({
+            "status": "success",
+            "musicxml": musicxml_str
+        })
+    except Exception as e:
+        print(f"Error processing request: {str(e)}")
+        traceback.print_exc()
+        return jsonify({
+            "status": "error",
+            "message": str(e)
+        }), 400
+    finally:
+        # Clean up temp file
+        if temp_midi_path and os.path.exists(temp_midi_path):
+            try:
+                os.unlink(temp_midi_path)
+            except Exception as e:
+                print(f"Warning: Could not remove {temp_midi_path}: {str(e)}")
+@app.route('/uploadinfill', methods=['POST'])
+def handle_upload_infilling():
+    temp_midi_path = None
+    top_p = float(request.form.get('top_p', '0.95'))
+    try:
+        # Validate input
+        if 'midi_file' not in request.files:
+            return jsonify({"status": "error", "message": "No MIDI file provided"}), 400
+        midi_file = request.files['midi_file']
+        start_time = request.form.get('start_time', '0')
+        end_time = request.form.get('end_time', '0')
+        # Create temporary MIDI file with random name
+        temp_dir = tempfile.gettempdir()
+        temp_midi_path = os.path.join(temp_dir, f"temp_{uuid4().hex}.mid")
+        # Save uploaded MIDI to temp file
+        midi_file.save(temp_midi_path)
+        # Process MIDI
+        midi = mido.MidiFile(temp_midi_path)
+        model = load_model()
+        infilled_midi = infiller(model,midi, int(start_time)/1000, int(end_time)/1000,top_p=top_p)
+        # Save harmonized MIDI (overwriting temp file)
+        infilled_midi.save(temp_midi_path)
+        # Convert to MusicXML string
+        musicxml_str = midi_to_musicxml(temp_midi_path)
+        # Final type verification
+        if not isinstance(musicxml_str, str):
+            raise TypeError(f"Expected string but got {type(musicxml_str)}")
+        return jsonify({
+            "status": "success",
+            "musicxml": musicxml_str
+        })
+    except Exception as e:
+        print(f"Error processing request: {str(e)}")
+        traceback.print_exc()
+        return jsonify({
+            "status": "error",
+            "message": str(e)
+        }), 400
+    finally:
+        # Clean up temp file
+        if temp_midi_path and os.path.exists(temp_midi_path):
+            try:
+                os.unlink(temp_midi_path)
+            except Exception as e:
+                print(f"Warning: Could not remove {temp_midi_path}: {str(e)}")
+@app.route('/uploadchangemelody', methods=['POST'])
+def handle_upload_changemelody():
+    temp_midi_path = None
+    top_p = float(request.form.get('top_p', '0.95'))
+    try:
+        # Validate input
+        if 'midi_file' not in request.files:
+            return jsonify({"status": "error", "message": "No MIDI file provided"}), 400
+        midi_file = request.files['midi_file']
+        start_time = request.form.get('start_time', '0')
+        end_time = request.form.get('end_time', '0')
+        # Create temporary MIDI file with random name
+        temp_dir = tempfile.gettempdir()
+        temp_midi_path = os.path.join(temp_dir, f"temp_{uuid4().hex}.mid")
+        # Save uploaded MIDI to temp file
+        midi_file.save(temp_midi_path)
+        # Process MIDI
+        midi = mido.MidiFile(temp_midi_path)
+        model = load_model()
+        changed_melody_midi = change_melody(model,midi, int(start_time)/1000, int(end_time)/1000,top_p=top_p)
+        # Save harmonized MIDI (overwriting temp file)
+        changed_melody_midi.save(temp_midi_path)
+        # Convert to MusicXML string
+        musicxml_str = midi_to_musicxml(temp_midi_path)
+        # Final type verification
+        if not isinstance(musicxml_str, str):
+            raise TypeError(f"Expected string but got {type(musicxml_str)}")
+        return jsonify({
+            "status": "success",
+            "musicxml": musicxml_str
+        })
+    except Exception as e:
+        print(f"Error processing request: {str(e)}")
+        traceback.print_exc()
+        return jsonify({
+            "status": "error",
+            "message": str(e)
+        }), 400
+    finally:
+        # Clean up temp file
+        if temp_midi_path and os.path.exists(temp_midi_path):
+            try:
+                os.unlink(temp_midi_path)
+            except Exception as e:
+                print(f"Warning: Could not remove {temp_midi_path}: {str(e)}")
+if __name__ == '__main__':
+    app.run(debug=True, port=5000)

examples/full-score3.mid ADDED Viewed

Binary file (1.36 kB). View file

examples/strawberry.mid ADDED Viewed

Binary file (24.2 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+matplotlib == 3.7.1
+midi2audio == 0.1.1
+mido == 1.2.10
+numpy >= 1.22.4
+torch >= 2.0.1
+transformers == 4.29.2
+tqdm == 4.65.0
+flask==3.1.1
+flask-cors==5.0.1
+music21
+gunicorn