shethjenil commited on
Commit
ae92f3c
·
verified ·
1 Parent(s): 6297a41

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +170 -169
  3. packages.txt +1 -0
  4. requirements.txt +2 -0
  5. violin-guitar.sf2 +3 -0
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  viola.sf2 filter=lfs diff=lfs merge=lfs -text
37
  instrument.sf2 filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  viola.sf2 filter=lfs diff=lfs merge=lfs -text
37
  instrument.sf2 filter=lfs diff=lfs merge=lfs -text
38
+ violin-guitar.sf2 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,169 +1,170 @@
1
- # from midi2audio import FluidSynth
2
- # from pydub import AudioSegment
3
- # import os
4
- # def midi_audio_mix_export(audio_path, midi_path, instrument_audio_path="only_instrument.mp3", output_path="mix.mp3"):
5
- # temp_midi_audio = "temp_midi_output.wav"
6
- # fs = FluidSynth()
7
- # fs.midi_to_audio(midi_path, temp_midi_audio)
8
- # midi_audio = AudioSegment.from_file(temp_midi_audio)
9
- # input_audio = AudioSegment.from_file(audio_path)
10
- # midi_audio_boosted = midi_audio + 15
11
- # input_audio_reduced = input_audio - 15.5
12
- # min_len = min(len(midi_audio_boosted), len(input_audio_reduced))
13
- # midi_trimmed = midi_audio_boosted[:min_len]
14
- # audio_trimmed = input_audio_reduced[:min_len]
15
- # mixed_audio = audio_trimmed.overlay(midi_trimmed)
16
- # mixed_audio.export(output_path, format="mp3")
17
- # os.remove(temp_midi_audio)
18
- # midi_audio_boosted.export(instrument_audio_path, format="mp3")
19
- # return output_path , instrument_audio_path, midi_path
20
-
21
-
22
- from audio2midi.basic_pitch_pitch_detector import BasicPitch , model_output_to_notes
23
- from torch import device as Device
24
- import gradio as gr
25
- import pretty_midi_fix
26
-
27
-
28
-
29
-
30
- def merge_midis(midi1: pretty_midi_fix.PrettyMIDI, midi2: pretty_midi_fix.PrettyMIDI,concatenate: bool = False):
31
- if concatenate:
32
- # Offset midi2 so it starts after midi1 ends
33
- offset = midi1.get_end_time()
34
- for instrument in midi2.instruments:
35
- for note in instrument.notes:
36
- note.start += offset
37
- note.end += offset
38
- for bend in instrument.pitch_bends:
39
- bend.time += offset
40
- for cc in instrument.control_changes:
41
- cc.time += offset
42
-
43
- # Merge instruments
44
- merged = pretty_midi_fix.PrettyMIDI()
45
- merged.instruments.extend(midi1.instruments)
46
- merged.instruments.extend(midi2.instruments)
47
-
48
- # Merge global metadata (tempo, time signatures, key signatures)
49
- merged.time_signature_changes.extend(midi1.time_signature_changes + midi2.time_signature_changes)
50
- merged.key_signature_changes.extend(midi1.key_signature_changes + midi2.key_signature_changes)
51
- merged._tick_scales = midi1._tick_scales if midi1._tick_scales else midi2._tick_scales
52
-
53
- # Tempo changes
54
- for tempo_change in midi1.get_tempo_changes()[1]:
55
- pass # Usually keep first tempo map; advanced merging requires remapping
56
- return merged
57
-
58
-
59
- def quantize(input, quantization='1/8T'):
60
- if quantization not in ['1/4', '1/8', '1/16', '1/32', '1/4T', '1/8T', '1/16T', '1/32T']:
61
- return input
62
- q_map = {
63
- "1/4": 1/1,
64
- "1/8": 1/2,
65
- "1/16": 1/4,
66
- "1/32": 1/8,
67
- "1/4T": 1/1.5,
68
- "1/8T": 1/3,
69
- "1/16T": 1/6,
70
- "1/32T": 1/12
71
- }
72
- input.instruments[0].program = 24
73
- tempo_changes, tempi = input.get_tempo_changes()
74
- default_tempo = tempi[0]
75
- spqn = 60.0 / default_tempo
76
- step = spqn * q_map[quantization]
77
- for instrument in input.instruments:
78
- for note in instrument.notes:
79
- note.start = round(note.start / step) * step
80
- note.end = max(note.start + 0.05, round(note.end / step) * step) # avoid zero-length notes
81
- for instrument in input.instruments:
82
- for cc in instrument.control_changes:
83
- cc.time = round(cc.time / step) * step
84
- return input
85
-
86
-
87
- device = Device('cuda' if Device.type == 'cuda' else 'cpu')
88
- pitch_detector = BasicPitch(device=device)
89
-
90
-
91
- def clone_midi(midi_obj):
92
- """Create a deep copy of a PrettyMIDI object (instruments, notes, CCs, etc.)."""
93
- new_midi = pretty_midi_fix.PrettyMIDI()
94
- # Copy metadata
95
- new_midi.time_signature_changes = list(midi_obj.time_signature_changes)
96
- new_midi.key_signature_changes = list(midi_obj.key_signature_changes)
97
- new_midi._tick_scales = list(midi_obj._tick_scales)
98
- # Copy instruments and their contents
99
- for instr in midi_obj.instruments:
100
- new_instr = pretty_midi_fix.Instrument(program=instr.program, is_drum=instr.is_drum, name=instr.name)
101
- new_instr.notes = [pretty_midi_fix.Note(n.velocity, n.pitch, n.start, n.end) for n in instr.notes]
102
- new_instr.pitch_bends = [pretty_midi_fix.PitchBend(pb.pitch, pb.time) for pb in instr.pitch_bends]
103
- new_instr.control_changes = [pretty_midi_fix.ControlChange(cc.number, cc.value, cc.time) for cc in instr.control_changes]
104
- new_midi.instruments.append(new_instr)
105
- return new_midi
106
-
107
-
108
- import gradio as gr
109
- import pretty_midi_fix
110
-
111
- def run_inference(audio_file, progress=gr.Progress()):
112
- if not audio_file:
113
- return None, "No audio file provided."
114
- try:
115
- result = pitch_detector.run_inference(audio_file, lambda x, y:progress((x, y)))
116
- return result, "Inference completed!"
117
- except Exception as e:
118
- return None, f"Error: {str(e)}"
119
-
120
- def generate_midi(cached_result, onset_thresh, frame_thresh, min_note_len, midi_tempo,
121
- infer_onsets, include_pitch_bends, melodia_trick, quantize_midi):
122
- if cached_result is None:
123
- gr.Warning("No inference results found. Please run inference first.")
124
- return None
125
-
126
- # Generate original MIDI (Violin)
127
- notes_violin = model_output_to_notes(
128
- cached_result, onset_thresh, frame_thresh,infer_onsets, min_note_len,
129
- None, None, include_pitch_bends, False, melodia_trick, midi_tempo
130
- )
131
- for inst in notes_violin.instruments:
132
- inst.program = 40 # Violin
133
-
134
- # Clone for Guitar
135
- notes_guitar = clone_midi(notes_violin)
136
- notes_guitar = quantize(notes_guitar, quantize_midi)
137
- for inst in notes_guitar.instruments:
138
- inst.program = 24 # Nylon Guitar
139
- inst.pitch_bends = [] # Remove pitch bends for guitar
140
-
141
- # Merge
142
- merged = merge_midis(notes_violin, notes_guitar)
143
- merged.write("output.mid")
144
- return "output.mid"
145
-
146
- with gr.Blocks() as demo:
147
- audio_input = gr.Audio(type="filepath", label="Upload Audio")
148
- run_btn = gr.Button("Run Model Inference")
149
- state_output = gr.State()
150
- status_output = gr.Textbox(label="Status")
151
- run_btn.click(run_inference, inputs=[audio_input], outputs=[state_output, status_output])
152
-
153
- with gr.Accordion("MIDI Settings", open=False):
154
- onset_thresh = gr.Slider(0, 1, 0, step=0.01, label="Onset Threshold")
155
- frame_thresh = gr.Slider(0, 1, 0.3, step=0.01, label="Frame Threshold")
156
- min_note_len = gr.Slider(1, 50, 11, step=1, label="Minimum Note Length (frames)")
157
- midi_tempo = gr.Slider(30, 240, 120, step=1, label="MIDI Tempo (BPM)")
158
- infer_onsets = gr.Checkbox(value=True, label="Infer Onsets")
159
- include_pitch_bends = gr.Checkbox(value=True, label="Include Pitch Bends")
160
- melodia_trick = gr.Checkbox(value=True, label="Use Melodia Trick")
161
- quantize_midi = gr.Dropdown(['1/4', '1/8', '1/16', '1/32', '1/4T', '1/8T', '1/16T', '1/32T', 'normal'], value="1/8T", label="Quantize MIDI")
162
-
163
- generate_btn = gr.Button("Generate MIDI")
164
- midi_output = gr.File(label="Generated MIDI", file_types=[".mid"])
165
- generate_btn.click(generate_midi,[state_output, onset_thresh, frame_thresh, min_note_len, midi_tempo,infer_onsets, include_pitch_bends, melodia_trick, quantize_midi],midi_output)
166
-
167
- demo.queue(max_size=1,default_concurrency_limit=1).launch()
168
-
169
-
 
 
1
+ from midi2audio import FluidSynth
2
+ from pydub import AudioSegment
3
+ import os
4
+ from audio2midi.basic_pitch_pitch_detector import BasicPitch, model_output_to_notes
5
+ from torch import device as Device
6
+ import gradio as gr
7
+ import pretty_midi_fix
8
+
9
+
10
+ def midi_audio_mix_export(audio_path, midi_path, instrument_audio_path="only_instrument.mp3", output_path="mix.mp3",sf_file="violin-guitar.sf2"):
11
+ temp_midi_audio = "temp_midi_output.wav"
12
+ fs = FluidSynth(sf_file)
13
+ fs.midi_to_audio(midi_path, temp_midi_audio)
14
+ midi_audio = AudioSegment.from_file(temp_midi_audio)
15
+ input_audio = AudioSegment.from_file(audio_path)
16
+ midi_audio_boosted = midi_audio + 15
17
+ input_audio_reduced = input_audio - 15.5
18
+ min_len = min(len(midi_audio_boosted), len(input_audio_reduced))
19
+ midi_trimmed = midi_audio_boosted[:min_len]
20
+ audio_trimmed = input_audio_reduced[:min_len]
21
+ mixed_audio = audio_trimmed.overlay(midi_trimmed)
22
+ mixed_audio.export(output_path, format="mp3")
23
+ os.remove(temp_midi_audio)
24
+ midi_audio_boosted.export(instrument_audio_path, format="mp3")
25
+ return output_path, instrument_audio_path, midi_path
26
+
27
+
28
+ def merge_midis(midi1: pretty_midi_fix.PrettyMIDI, midi2: pretty_midi_fix.PrettyMIDI, concatenate: bool = False):
29
+ if concatenate:
30
+ offset = midi1.get_end_time()
31
+ for instrument in midi2.instruments:
32
+ for note in instrument.notes:
33
+ note.start += offset
34
+ note.end += offset
35
+ for bend in instrument.pitch_bends:
36
+ bend.time += offset
37
+ for cc in instrument.control_changes:
38
+ cc.time += offset
39
+ merged = pretty_midi_fix.PrettyMIDI()
40
+ merged.instruments.extend(midi1.instruments)
41
+ merged.instruments.extend(midi2.instruments)
42
+ merged.time_signature_changes.extend(midi1.time_signature_changes + midi2.time_signature_changes)
43
+ merged.key_signature_changes.extend(midi1.key_signature_changes + midi2.key_signature_changes)
44
+ merged._tick_scales = midi1._tick_scales if midi1._tick_scales else midi2._tick_scales
45
+ return merged
46
+
47
+
48
+ def quantize(input, quantization='1/8T'):
49
+ if quantization not in ['1/4', '1/8', '1/16', '1/32', '1/4T', '1/8T', '1/16T', '1/32T']:
50
+ return input
51
+ q_map = {
52
+ "1/4": 1/1,
53
+ "1/8": 1/2,
54
+ "1/16": 1/4,
55
+ "1/32": 1/8,
56
+ "1/4T": 1/1.5,
57
+ "1/8T": 1/3,
58
+ "1/16T": 1/6,
59
+ "1/32T": 1/12
60
+ }
61
+ input.instruments[0].program = 24
62
+ tempo_changes, tempi = input.get_tempo_changes()
63
+ default_tempo = tempi[0]
64
+ spqn = 60.0 / default_tempo
65
+ step = spqn * q_map[quantization]
66
+ for instrument in input.instruments:
67
+ for note in instrument.notes:
68
+ note.start = round(note.start / step) * step
69
+ note.end = max(note.start + 0.05, round(note.end / step) * step)
70
+ for instrument in input.instruments:
71
+ for cc in instrument.control_changes:
72
+ cc.time = round(cc.time / step) * step
73
+ return input
74
+
75
+
76
+ device = Device('cuda' if Device.type == 'cuda' else 'cpu')
77
+ pitch_detector = BasicPitch(device=device)
78
+
79
+
80
+ def clone_midi(midi_obj):
81
+ new_midi = pretty_midi_fix.PrettyMIDI()
82
+ new_midi.time_signature_changes = list(midi_obj.time_signature_changes)
83
+ new_midi.key_signature_changes = list(midi_obj.key_signature_changes)
84
+ new_midi._tick_scales = list(midi_obj._tick_scales)
85
+ for instr in midi_obj.instruments:
86
+ new_instr = pretty_midi_fix.Instrument(program=instr.program, is_drum=instr.is_drum, name=instr.name)
87
+ new_instr.notes = [pretty_midi_fix.Note(n.velocity, n.pitch, n.start, n.end) for n in instr.notes]
88
+ new_instr.pitch_bends = [pretty_midi_fix.PitchBend(pb.pitch, pb.time) for pb in instr.pitch_bends]
89
+ new_instr.control_changes = [pretty_midi_fix.ControlChange(cc.number, cc.value, cc.time) for cc in instr.control_changes]
90
+ new_midi.instruments.append(new_instr)
91
+ return new_midi
92
+
93
+
94
+ def run_inference(audio_file, progress=gr.Progress()):
95
+ if not audio_file:
96
+ return None, "No audio file provided."
97
+ try:
98
+ result = pitch_detector.run_inference(audio_file, lambda x, y: progress((x, y)))
99
+ return result, "Inference completed!"
100
+ except Exception as e:
101
+ return None, f"Error: {str(e)}"
102
+
103
+
104
+ def generate_midi(audio_file, cached_result, onset_thresh, frame_thresh, min_note_len, midi_tempo,
105
+ infer_onsets, include_pitch_bends, melodia_trick, quantize_midi):
106
+ if cached_result is None:
107
+ gr.Warning("No inference results found. Please run inference first.")
108
+ return None, None, None
109
+
110
+ # Generate violin MIDI
111
+ notes_violin = model_output_to_notes(
112
+ cached_result, onset_thresh, frame_thresh, infer_onsets, min_note_len,
113
+ None, None, include_pitch_bends, False, melodia_trick, midi_tempo
114
+ )
115
+ for inst in notes_violin.instruments:
116
+ inst.program = 40 # Violin
117
+
118
+ # If quantization is enabled (not "not using"), create guitar + merge
119
+ if quantize_midi != "not using":
120
+ notes_guitar = clone_midi(notes_violin)
121
+ notes_guitar = quantize(notes_guitar, quantize_midi)
122
+ for inst in notes_guitar.instruments:
123
+ inst.program = 24 # Nylon Guitar
124
+ inst.pitch_bends = []
125
+ final_midi = merge_midis(notes_violin, notes_guitar)
126
+ else:
127
+ final_midi = notes_violin # Only violin
128
+
129
+ final_midi.write("output.mid")
130
+
131
+ # Generate audio (instrument-only + mix)
132
+ mix_audio, instrument_audio, midi_path = midi_audio_mix_export(audio_file, "output.mid")
133
+
134
+ return "output.mid", instrument_audio, mix_audio
135
+
136
+
137
+ # ---- Gradio UI ----
138
+ with gr.Blocks() as demo:
139
+ audio_input = gr.Audio(type="filepath", label="Upload Audio")
140
+ run_btn = gr.Button("Run Model Inference")
141
+ state_output = gr.State()
142
+ status_output = gr.Textbox(label="Status")
143
+ run_btn.click(run_inference, inputs=[audio_input], outputs=[state_output, status_output])
144
+
145
+ with gr.Accordion("MIDI Settings", open=False):
146
+ onset_thresh = gr.Slider(0, 1, 0, step=0.01, label="Onset Threshold")
147
+ frame_thresh = gr.Slider(0, 1, 0.3, step=0.01, label="Frame Threshold")
148
+ min_note_len = gr.Slider(1, 50, 11, step=1, label="Minimum Note Length (frames)")
149
+ midi_tempo = gr.Slider(30, 240, 120, step=1, label="MIDI Tempo (BPM)")
150
+ infer_onsets = gr.Checkbox(value=True, label="Infer Onsets")
151
+ include_pitch_bends = gr.Checkbox(value=True, label="Include Pitch Bends")
152
+ melodia_trick = gr.Checkbox(value=True, label="Use Melodia Trick")
153
+ quantize_midi = gr.Dropdown(
154
+ ['1/4', '1/8', '1/16', '1/32', '1/4T', '1/8T', '1/16T', '1/32T', 'not using'],
155
+ value="not using",
156
+ label="Quantize MIDI"
157
+ )
158
+
159
+ generate_btn = gr.Button("Generate MIDI & Audio")
160
+ midi_output = gr.File(label="Generated MIDI", file_types=[".mid"])
161
+ instrument_output = gr.Audio(label="Instrument Only Audio")
162
+ mix_output = gr.Audio(label="Mixed Audio (Original + Instrument)")
163
+ generate_btn.click(
164
+ generate_midi,
165
+ [audio_input, state_output, onset_thresh, frame_thresh, min_note_len, midi_tempo,
166
+ infer_onsets, include_pitch_bends, melodia_trick, quantize_midi],
167
+ [midi_output, instrument_output, mix_output]
168
+ )
169
+
170
+ demo.queue(max_size=1, default_concurrency_limit=1).launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ fluidsynth
requirements.txt CHANGED
@@ -1 +1,3 @@
 
 
1
  audio2midi[basic_pitch_pitch_detector]
 
1
+ midi2audio
2
+ pydub
3
  audio2midi[basic_pitch_pitch_detector]
violin-guitar.sf2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06723c530ee4cde9048d66f60eacfce27708d350ab92d6d444b54e9120dcfe15
3
+ size 29233526