Added scaling and capping in midi_to_notes; removed notes_to_midi

The scaling and capping was done outside of midi_to_notes.

notes_to_midi was not necessary, since map_to_wav already did its job while being aware of the output format of the model.

Files changed (1) hide show

audio_methods.py +15 -36

audio_methods.py CHANGED Viewed

@@ -1,5 +1,4 @@
-# WHERE DID I USE SCALING FACTORS TO CONVERT WAV TO MAP?
 _SAMPLING_RATE = 16000 # Parameter to pass continuous signal to a discrete one
 _INSTRUMENT_NAME = "Acoustic Grand Piano" # MIDI instrument used
 _SCALING_FACTORS = pd.Series(
@@ -7,6 +6,9 @@ _SCALING_FACTORS = pd.Series(
 ) # Factors used to normalize song maps
 def midi_to_notes(midi_file: str) -> pd.DataFrame:
   pm = pretty_midi.PrettyMIDI(midi_file)
   instrument = pm.instruments[0]
   notes = collections.defaultdict(list)
@@ -15,45 +17,20 @@ def midi_to_notes(midi_file: str) -> pd.DataFrame:
   sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
   prev_start = sorted_notes[0].start
   for note in sorted_notes:
     start = note.start
     end = note.end
     notes['pitch'].append(note.pitch)
-    notes['start'].append(start)
-    notes['end'].append(end)
     notes['step'].append(start - prev_start)
     notes['duration'].append(end - start)
     prev_start = start
-  return pd.DataFrame({name: np.array(value) for name, value in notes.items()})
-def notes_to_midi(
-  notes: pd.DataFrame,
-  out_file: str,
-  velocity: int = 100,  # note loudness
-) -> pretty_midi.PrettyMIDI:
-  pm = pretty_midi.PrettyMIDI()
-  instrument = pretty_midi.Instrument(
-      program=pretty_midi.instrument_name_to_program(
-          _INSTRUMENT_NAME))
-  prev_start = 0
-  for i, note in notes.iterrows():
-    start = float(prev_start + note['step'])
-    end = float(start + note['duration'])
-    note = pretty_midi.Note(
-        velocity=velocity,
-        pitch=int(note['pitch']),
-        start=start,
-        end=end,
-    )
-    instrument.notes.append(note)
-    prev_start = start
-  pm.instruments.append(instrument)
-  pm.write(out_file)
-  return pm
 def display_audio(pm: pretty_midi.PrettyMIDI, seconds=120):
@@ -65,7 +42,9 @@ def display_audio(pm: pretty_midi.PrettyMIDI, seconds=120):
 # Define function to convert song map to wav
-def map_to_wav(song_map, out_file, instrument_name, velocity=100):
   contracted_map = tf.squeeze(song_map)
   song_map_T = contracted_map.numpy().T
   notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1)
@@ -74,7 +53,7 @@ def map_to_wav(song_map, out_file, instrument_name, velocity=100):
   pm = pretty_midi.PrettyMIDI()
   instrument = pretty_midi.Instrument(
       program=pretty_midi.instrument_name_to_program(
-          instrument_name))
   prev_start = 0
   for i, note in notes.iterrows():
@@ -93,9 +72,9 @@ def map_to_wav(song_map, out_file, instrument_name, velocity=100):
   pm.write(out_file)
   return pm
-def generate_and_display(out_file, instrument_name, model, z_sample=None, velocity=100, seconds=120):
   song_map = model.generate(z_sample)
   display.display(imshow(tf.squeeze(song_map)[:,:50]))
-  wav = map_to_wav(song_map, out_file, instrument_name, velocity)
   return display_audio(wav, seconds)

+_CAP = 3501 # Cap for the number of notes
 _SAMPLING_RATE = 16000 # Parameter to pass continuous signal to a discrete one
 _INSTRUMENT_NAME = "Acoustic Grand Piano" # MIDI instrument used
 _SCALING_FACTORS = pd.Series(
 ) # Factors used to normalize song maps
 def midi_to_notes(midi_file: str) -> pd.DataFrame:
+  # Convert midi file to "song map" (dataframe where each note is broken
+  # into its components)
   pm = pretty_midi.PrettyMIDI(midi_file)
   instrument = pm.instruments[0]
   notes = collections.defaultdict(list)
   sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
   prev_start = sorted_notes[0].start
+  # Separate each individual note in pitch, step and duration
   for note in sorted_notes:
     start = note.start
     end = note.end
     notes['pitch'].append(note.pitch)
     notes['step'].append(start - prev_start)
     notes['duration'].append(end - start)
     prev_start = start
+  # Put notes in a dataframe
+  notes_df = pd.DataFrame({name: np.array(value) for name, value in notes.items()})
+  notes_df = notes_df[:_CAP] # Cap the song to match the model's architecture
+  return notes_df / _SCALING_FACTORS # Scale
 def display_audio(pm: pretty_midi.PrettyMIDI, seconds=120):
 # Define function to convert song map to wav
+def map_to_wav(song_map: pd.DataFrame, out_file: str, velocity: int=100):
+  # Convert "song map" to midi file (reverse process with respect to midi_to_notes)
   contracted_map = tf.squeeze(song_map)
   song_map_T = contracted_map.numpy().T
   notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1)
   pm = pretty_midi.PrettyMIDI()
   instrument = pretty_midi.Instrument(
       program=pretty_midi.instrument_name_to_program(
+          _INSTRUMENT_NAME))
   prev_start = 0
   for i, note in notes.iterrows():
   pm.write(out_file)
   return pm
+def generate_and_display(out_file, model, z_sample=None, velocity=100, seconds=120):
   song_map = model.generate(z_sample)
   display.display(imshow(tf.squeeze(song_map)[:,:50]))
+  wav = map_to_wav(song_map, out_file, velocity)
   return display_audio(wav, seconds)