Spaces:
Build error
Build error
| import pickle | |
| import pretty_midi | |
| import gradio as gr | |
| from music21 import * | |
| from midi2audio import FluidSynth | |
| import torch | |
| import torch.nn as nn | |
| from torch.nn import functional as F | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| file_path = './objects/int_to_note.pkl' | |
| with open(file_path, 'rb') as f: | |
| int_to_note = pickle.load(f) | |
| file_path = './objects/note_to_int.pkl' | |
| with open(file_path, 'rb') as f: | |
| note_to_int = pickle.load(f) | |
| class GenerationRNN(nn.Module): | |
| def __init__(self, input_size, hidden_size, output_size, n_layers=1): | |
| super(GenerationRNN, self).__init__() | |
| self.input_size = input_size | |
| self.hidden_size = hidden_size | |
| self.output_size = output_size | |
| self.n_layers = n_layers | |
| self.embedding = nn.Embedding(input_size, hidden_size) | |
| self.gru = nn.GRU(hidden_size, hidden_size, n_layers) | |
| self.decoder = nn.Linear(hidden_size * n_layers, output_size) | |
| def forward(self, input, hidden): | |
| # Creates embedding of the input texts | |
| #print('initial input', input.size()) | |
| input = self.embedding(input.view(1, -1)) | |
| #print('input after embedding', input.size()) | |
| output, hidden = self.gru(input, hidden) | |
| #print('output after gru', output.size()) | |
| #print('hidden after gru', hidden.size()) | |
| output = self.decoder(hidden.view(1, -1)) | |
| #print('output after decoder', output.size()) | |
| return output, hidden | |
| def init_hidden(self): | |
| return torch.zeros(self.n_layers, 1, self.hidden_size).to(device) | |
| def predict_multimomial(net, prime_seq, predict_len, temperature=0.8): | |
| ''' | |
| Arguments: | |
| prime_seq - priming sequence (converted t) | |
| predict_len - number of notes to predict for after prime sequence | |
| ''' | |
| hidden = net.init_hidden() | |
| predicted = prime_seq.copy() | |
| prime_seq = torch.tensor(prime_seq, dtype = torch.long).to(device) | |
| # "Building up" the hidden state using the prime sequence | |
| for p in range(len(prime_seq) - 1): | |
| input = prime_seq[p] | |
| _, hidden = net(input, hidden) | |
| # Last character of prime sequence | |
| input = prime_seq[-1] | |
| # For every index to predict | |
| for p in range(predict_len): | |
| # Pass the inputs to the model - output has dimension n_pitches - scores for each of the possible characters | |
| output, hidden = net(input, hidden) | |
| # Sample from the network output as a multinomial distribution | |
| output = output.data.view(-1).div(temperature).exp() | |
| predicted_id = torch.multinomial(output, 1) | |
| # Add predicted index to the list and use as next input | |
| predicted.append(predicted_id.item()) | |
| input = predicted_id | |
| return predicted | |
| def create_midi(prediction_output): | |
| """ convert the output from the prediction to notes and create a midi file | |
| from the notes """ | |
| offset = 0 | |
| output_notes = [] | |
| # create note and chord objects based on the values generated by the model | |
| for pattern in prediction_output: | |
| # pattern is a chord | |
| if ('.' in pattern) or pattern.isdigit(): | |
| notes_in_chord = pattern.split('.') | |
| notes = [] | |
| for current_note in notes_in_chord: | |
| new_note = note.Note(int(current_note)) | |
| new_note.storedInstrument = instrument.Piano() | |
| notes.append(new_note) | |
| new_chord = chord.Chord(notes) | |
| new_chord.offset = offset | |
| output_notes.append(new_chord) | |
| # pattern is a note | |
| else: | |
| new_note = note.Note(pattern) | |
| new_note.offset = offset | |
| new_note.storedInstrument = instrument.Piano() | |
| output_notes.append(new_note) | |
| # increase offset each iteration so that notes do not stack | |
| offset += 0.5 | |
| midi_stream = stream.Stream(output_notes) | |
| return midi_stream | |
| def get_note_names(midi): | |
| s2 = instrument.partitionByInstrument(midi) | |
| piano_part = None | |
| # Filter for only the piano part | |
| instr = instrument.Piano | |
| for part in s2: | |
| if isinstance(part.getInstrument(), instr): | |
| piano_part = part | |
| notes_song = [] | |
| if not piano_part: # Some songs somehow have no piano parts | |
| # Just take the first part | |
| piano_part = s2[0] | |
| for element in piano_part: | |
| if isinstance(element, note.Note): | |
| # Return the pitch of the single note | |
| notes_song.append(str(element.pitch)) | |
| elif isinstance(element, chord.Chord): | |
| # Returns the normal order of a Chord represented in a list of integers | |
| notes_song.append('.'.join(str(n) for n in element.normalOrder)) | |
| return notes_song | |
| def process_input(input_midi_file, input_randomness, input_duration): | |
| print(input_midi_file.name) | |
| midi = converter.parse(input_midi_file.name) | |
| note_names = get_note_names(midi) | |
| int_notes = [note_to_int[note_name] for note_name in note_names] | |
| duration_to_size = {30: 100, 20: 66, 10: 33} | |
| dur = duration_to_size[input_duration] | |
| generated_seq_multinomial = predict_multimomial(model, int_notes, predict_len = dur, temperature = input_randomness / 50) | |
| generated_seq_multinomial = [int_to_note[e] for e in generated_seq_multinomial] | |
| pred_midi_multinomial = create_midi(generated_seq_multinomial) | |
| pred_midi_multinomial.write('midi', fp='result.midi') | |
| sound_font = "/usr/share/sounds/sf2/FluidR3_GM.sf2" | |
| FluidSynth(sound_font).midi_to_audio('result.midi', 'result.wav') | |
| return 'result.wav', 'result.midi' | |
| file_path = './objects/model_cpu.pkl' | |
| with open(file_path, 'rb') as f: | |
| model = pickle.load(f) | |
| midi_file_desc = """ | |
| Audio file in .midi format | |
| """ | |
| article = """ | |
| This model allows you to generate music based on audio input. Please upload a MIDI file below, choose music randomness and duration. The project has been created by the students of Ukrainian Catholic University for our ML course. | |
| We are using a GRU model to output new notes based on the given input. You can find more information at our Git repo: https://github.com/DmytroLopushanskyy/music-generation | |
| We are using a language model to create music by treating a musical standard MIDI a simple text, with tokens for note values, note duration, and separations to denote movement forward in time. | |
| """ | |
| title = """ | |
| Classical Music Generation | |
| """ | |
| iface = gr.Interface( | |
| fn=process_input, | |
| inputs=[ | |
| gr.inputs.File(label=midi_file_desc), | |
| gr.inputs.Slider(50, 250, default=100, step=50), | |
| gr.inputs.Radio([10, 20, 30], type="value", default=20) | |
| ], | |
| title=title, | |
| outputs=["audio", "file"], | |
| article=article, | |
| examples=[ | |
| ['examples/mozart.midi', 100, 10], | |
| ['examples/beethoven.midi', 50, 30], | |
| ['examples/chopin.midi', 100, 20] | |
| ] | |
| ) | |
| iface.launch() | |