Aliwan commited on
Commit
5e5a371
·
verified ·
1 Parent(s): 0467922

Upload 3 files

Browse files
Files changed (3) hide show
  1. ReadMe.md.txt +36 -0
  2. python.py.txt +227 -0
  3. requirements.txt.txt +8 -0
ReadMe.md.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Basic Pitch - Audio to MIDI
2
+
3
+ A Hugging Face Space for converting audio files to MIDI using Spotify's Basic Pitch model.
4
+
5
+ ## Features
6
+
7
+ - **Automatic Music Transcription (AMT):** Converts audio to MIDI notation
8
+ - **Instrument Agnostic:** Works with vocals, strings, brass, woodwinds, etc.
9
+ - **CPU Optimized:** Lightweight model designed for CPU inference
10
+ - **Polyphonic Detection:** Detects multiple simultaneous notes
11
+ - **Easy to Use:** Simple Gradio web interface
12
+
13
+ ## How to Use
14
+
15
+ 1. Upload an audio file (`.wav`, `.mp3`, `.ogg`, `.flac`, `.m4a`)
16
+ 2. Click "Transcribe to MIDI"
17
+ 3. Download the resulting MIDI file
18
+
19
+ ## Model Information
20
+
21
+ - **Model:** ICASSP 2022 (Spotify Basic Pitch)
22
+ - **Size:** ~20 MB
23
+ - **Inference Time:** ~1-2 seconds per minute of audio (CPU)
24
+ - **Hardware:** No GPU required
25
+
26
+ ## Best Practices
27
+
28
+ - Use mono audio for best results
29
+ - Avoid heavy background noise
30
+ - Works best with single instruments
31
+ - Clear, high-quality recordings produce better results
32
+
33
+ ## References
34
+
35
+ - [GitHub Repository](https://github.com/spotify/basic-pitch)
36
+ - [Paper](https://arxiv.org/abs/2209.00799)
python.py.txt ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Basic Pitch Audio-to-MIDI Converter
3
+ Hugging Face Space for CPU inference
4
+ July 2024 version
5
+ """
6
+
7
+ import gradio as gr
8
+ import numpy as np
9
+ from basic_pitch.inference import predict
10
+ from basic_pitch import ICASSP_2022_MODEL_PATH
11
+ import tempfile
12
+ import os
13
+
14
+
15
+ def transcribe_audio(audio_input):
16
+ """
17
+ Transcribe audio to MIDI using Basic Pitch model.
18
+
19
+ Args:
20
+ audio_input: Tuple of (sample_rate, audio_array) from Gradio Audio component
21
+
22
+ Returns:
23
+ midi_file_path: Path to generated MIDI file
24
+ note_summary: Summary of detected notes
25
+ """
26
+ try:
27
+ if audio_input is None:
28
+ return None, "Please upload an audio file first."
29
+
30
+ sample_rate, audio_data = audio_input
31
+
32
+ # Create temporary directory for processing
33
+ with tempfile.TemporaryDirectory() as tmpdir:
34
+ # Save audio to temporary file
35
+ audio_path = os.path.join(tmpdir, "input_audio.wav")
36
+
37
+ import soundfile as sf
38
+ sf.write(audio_path, audio_data, sample_rate)
39
+
40
+ # Run Basic Pitch inference
41
+ model_output, midi_data, note_events = predict(
42
+ audio_path,
43
+ model_or_model_path=ICASSP_2022_MODEL_PATH,
44
+ onset_thresh=0.5,
45
+ frame_thresh=0.3,
46
+ minimum_note_length=127.70254248031496,
47
+ minimum_frequency=10,
48
+ maximum_frequency=2000,
49
+ melodia_trick=True,
50
+ sonify=False
51
+ )
52
+
53
+ # Save MIDI output
54
+ midi_path = os.path.join(tmpdir, "output.mid")
55
+ midi_data.write(midi_path)
56
+
57
+ # Generate note summary
58
+ note_summary = generate_note_summary(note_events)
59
+
60
+ return midi_path, note_summary
61
+
62
+ except Exception as e:
63
+ return None, f"Error: {str(e)}"
64
+
65
+
66
+ def generate_note_summary(note_events):
67
+ """
68
+ Generate a human-readable summary of detected notes.
69
+
70
+ Args:
71
+ note_events: List of tuples (start_time, end_time, pitch_midi, amplitude, pitch_bends)
72
+
73
+ Returns:
74
+ Formatted string summary
75
+ """
76
+ if not note_events or len(note_events) == 0:
77
+ return "No notes detected in the audio."
78
+
79
+ summary = f"✓ Transcription Complete\n"
80
+ summary += f"Total notes detected: {len(note_events)}\n\n"
81
+ summary += "Note Events:\n"
82
+ summary += "-" * 70 + "\n"
83
+ summary += f"{'Start (s)':<12} {'End (s)':<12} {'MIDI':<8} {'Duration':<12} {'Amplitude':<12}\n"
84
+ summary += "-" * 70 + "\n"
85
+
86
+ for start_time, end_time, midi_pitch, amplitude, pitch_bends in note_events:
87
+ duration = end_time - start_time
88
+ summary += f"{start_time:<12.3f} {end_time:<12.3f} {midi_pitch:<8} {duration:<12.3f} {amplitude:<12.3f}\n"
89
+
90
+ summary += "-" * 70 + "\n"
91
+
92
+ # Calculate statistics
93
+ avg_duration = np.mean([end - start for start, end, _, _, _ in note_events])
94
+ avg_amplitude = np.mean([amp for _, _, _, amp, _ in note_events])
95
+
96
+ summary += f"\nStatistics:\n"
97
+ summary += f"Average note duration: {avg_duration:.3f}s\n"
98
+ summary += f"Average amplitude: {avg_amplitude:.3f}\n"
99
+
100
+ return summary
101
+
102
+
103
+ def create_gradio_interface():
104
+ """
105
+ Create the Gradio interface for Basic Pitch transcription.
106
+ """
107
+
108
+ with gr.Blocks(title="Basic Pitch - Audio to MIDI") as demo:
109
+
110
+ gr.Markdown("""
111
+ # 🎵 Basic Pitch: Automatic Music Transcription
112
+
113
+ Convert audio files to MIDI notation using Spotify's **Basic Pitch** model.
114
+
115
+ This lightweight neural network performs **automatic music transcription (AMT)**
116
+ and works with any instrument or voice.
117
+ """)
118
+
119
+ with gr.Row():
120
+ with gr.Column(scale=1):
121
+ gr.Markdown("### 📤 Input")
122
+
123
+ audio_input = gr.Audio(
124
+ label="Upload Audio File",
125
+ type="numpy",
126
+ sources=["upload", "microphone"]
127
+ )
128
+
129
+ gr.Markdown("""
130
+ **Supported formats:**
131
+ - `.wav`, `.mp3`, `.ogg`, `.flac`, `.m4a`
132
+
133
+ **Recommended:**
134
+ - Mono audio (single instrument)
135
+ - Clear, high-quality recordings
136
+ - 30 seconds to 5 minutes duration
137
+ """)
138
+
139
+ transcribe_btn = gr.Button(
140
+ "🎼 Transcribe to MIDI",
141
+ variant="primary",
142
+ size="lg"
143
+ )
144
+
145
+ with gr.Column(scale=1):
146
+ gr.Markdown("### 📥 Output")
147
+
148
+ midi_file = gr.File(
149
+ label="Download MIDI",
150
+ type="filepath"
151
+ )
152
+
153
+ note_info = gr.Textbox(
154
+ label="Note Detection Summary",
155
+ lines=15,
156
+ interactive=False,
157
+ max_lines=20
158
+ )
159
+
160
+ gr.Markdown("""
161
+ ---
162
+ ### ⚙️ Model Details
163
+
164
+ **Model:** ICASSP 2022 (Spotify Basic Pitch)
165
+ - Lightweight: ~20 MB
166
+ - CPU-optimized inference
167
+ - No GPU required
168
+
169
+ **Detection Parameters:**
170
+ - Onset threshold: 0.5 (note attack sensitivity)
171
+ - Frame threshold: 0.3 (note sustain sensitivity)
172
+ - Frequency range: 10 Hz - 2000 Hz
173
+ - Melodia post-processing: Enabled
174
+
175
+ **Output:**
176
+ - MIDI file with detected notes
177
+ - Note timing and pitch information
178
+ - Amplitude/velocity data
179
+ """)
180
+
181
+ gr.Markdown("""
182
+ ---
183
+ ### 💡 Tips for Best Results
184
+
185
+ 1. **Single instrument:** Works best with one instrument or voice
186
+ 2. **Mono audio:** Use mono recordings when possible
187
+ 3. **Clear audio:** Avoid background noise
188
+ 4. **Duration:** Works with any length, but 30s-5min is typical
189
+ 5. **Polyphonic:** Can detect multiple simultaneous notes
190
+
191
+ **Limitations:**
192
+ - Works best with pitched instruments (not drums)
193
+ - May struggle with very fast passages
194
+ - Polyphonic music may need manual correction
195
+ """)
196
+
197
+ gr.Markdown("""
198
+ ---
199
+ ### 📚 About Basic Pitch
200
+
201
+ Developed by [Spotify's Audio Intelligence Lab](https://github.com/spotify/basic-pitch)
202
+
203
+ **Citation:**
204
+ ```
205
+ Basic Pitch: A Lightweight Yet Effective Pitch Detection Model
206
+ for Automatic Music Transcription
207
+ Spotify, 2022
208
+ ```
209
+ """)
210
+
211
+ # Connect button to function
212
+ transcribe_btn.click(
213
+ fn=transcribe_audio,
214
+ inputs=[audio_input],
215
+ outputs=[midi_file, note_info]
216
+ )
217
+
218
+ return demo
219
+
220
+
221
+ if __name__ == "__main__":
222
+ interface = create_gradio_interface()
223
+ interface.launch(
224
+ server_name="0.0.0.0",
225
+ server_port=7860,
226
+ share=False
227
+ )
requirements.txt.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ basic-pitch==0.3.13
2
+ gradio==4.18.0
3
+ librosa==0.10.0
4
+ numpy==1.24.3
5
+ pretty-midi==0.2.10
6
+ scipy==1.11.0
7
+ soundfile==0.12.1
8
+ resampy==0.4.2