Spaces:
Build error
Build error
| """ | |
| Basic Pitch Audio-to-MIDI Converter | |
| Hugging Face Space for CPU inference | |
| Made with CopyPaste, Ai assisted | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| from basic_pitch.inference import predict | |
| from basic_pitch import ICASSP_2022_MODEL_PATH | |
| import tempfile | |
| import os | |
| from pathlib import Path | |
| def transcribe_audio(audio_input): | |
| """ | |
| Transcribe audio to MIDI using Basic Pitch model. | |
| Args: | |
| audio_input: Audio file path or tuple (sample_rate, audio_array) | |
| Returns: | |
| midi_file_path: Path to generated MIDI file | |
| note_events: List of detected note events | |
| """ | |
| try: | |
| # Handle different input formats | |
| if isinstance(audio_input, tuple): | |
| sample_rate, audio_array = audio_input | |
| # Create temporary audio file | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: | |
| import soundfile as sf | |
| sf.write(tmp.name, audio_array, sample_rate) | |
| audio_path = tmp.name | |
| else: | |
| audio_path = audio_input | |
| # Create temporary output directory | |
| with tempfile.TemporaryDirectory() as output_dir: | |
| # Run inference | |
| model_output, midi_data, note_events = predict( | |
| audio_path, | |
| model_or_model_path=ICASSP_2022_MODEL_PATH, | |
| onset_thresh=0.5, | |
| frame_thresh=0.3, | |
| minimum_note_length=127.70254248031496, | |
| minimum_frequency=10, | |
| maximum_frequency=2000, | |
| melodia_trick=True, | |
| sonify=False | |
| ) | |
| # Save MIDI file | |
| midi_output_path = os.path.join(output_dir, "output.mid") | |
| midi_data.write(midi_output_path) | |
| # Format note events for display | |
| note_info = format_note_events(note_events) | |
| # Copy MIDI to persistent location | |
| final_midi_path = "/tmp/basic_pitch_output.mid" | |
| midi_data.write(final_midi_path) | |
| return final_midi_path, note_info | |
| except Exception as e: | |
| return None, f"Error during transcription: {str(e)}" | |
| def format_note_events(note_events): | |
| """ | |
| Format note events into readable text. | |
| Args: | |
| note_events: List of (start_time, end_time, pitch_midi, amplitude, pitch_bends) | |
| Returns: | |
| Formatted string with note information | |
| """ | |
| if not note_events: | |
| return "No notes detected." | |
| output = "Detected Notes:\n" | |
| output += "-" * 60 + "\n" | |
| output += f"{'Start (s)':<12} {'End (s)':<12} {'MIDI Pitch':<12} {'Amplitude':<12}\n" | |
| output += "-" * 60 + "\n" | |
| for start, end, pitch, amplitude, _ in note_events: | |
| output += f"{start:<12.3f} {end:<12.3f} {pitch:<12} {amplitude:<12.3f}\n" | |
| output += "-" * 60 + "\n" | |
| output += f"Total notes detected: {len(note_events)}" | |
| return output | |
| def create_interface(): | |
| """ | |
| Create Gradio interface for Basic Pitch. | |
| """ | |
| with gr.Blocks(title="Basic Pitch - Audio to MIDI") as demo: | |
| gr.Markdown(""" | |
| # 🎵 Basic Pitch: Audio-to-MIDI Transcription | |
| Convert audio files to MIDI using Spotify's lightweight neural network model. | |
| **Supported formats:** `.mp3`, `.wav`, `.ogg`, `.flac`, `.m4a` | |
| **Features:** | |
| - Automatic music transcription (AMT) | |
| - Polyphonic note detection | |
| - Pitch bend estimation | |
| - Instrument-agnostic (works with vocals, strings, brass, etc.) | |
| **Note:** Works best with single-instrument audio. Mono audio is recommended. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Input") | |
| audio_input = gr.Audio( | |
| label="Upload Audio File", | |
| type="filepath", | |
| sources=["upload", "microphone"] | |
| ) | |
| submit_btn = gr.Button( | |
| "🎼 Transcribe to MIDI", | |
| variant="primary" | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("### Output") | |
| midi_output = gr.File( | |
| label="Download MIDI File", | |
| type="filepath" | |
| ) | |
| note_events_output = gr.Textbox( | |
| label="Detected Notes", | |
| lines=10, | |
| interactive=False | |
| ) | |
| gr.Markdown(""" | |
| ### Parameters | |
| - **Onset Threshold:** Minimum amplitude for onset detection (0.5) | |
| - **Frame Threshold:** Minimum amplitude for note frames (0.3) | |
| - **Melodia Trick:** Post-processing for melody extraction (enabled) | |
| - **Frequency Range:** 10 Hz - 2000 Hz | |
| """) | |
| gr.Markdown(""" | |
| ### Tips | |
| 1. **Best results:** Single instrument, mono audio, clear recordings | |
| 2. **Audio quality:** Higher quality audio produces better transcriptions | |
| 3. **Duration:** Works with any length, but longer files take more time | |
| 4. **Polyphonic:** Detects multiple simultaneous notes | |
| ### About Basic Pitch | |
| Developed by Spotify's Audio Intelligence Lab. See the [GitHub repo](https://github.com/spotify/basic-pitch) for more info. | |
| """) | |
| # Connect submit button | |
| submit_btn.click( | |
| fn=transcribe_audio, | |
| inputs=[audio_input], | |
| outputs=[midi_output, note_events_output] | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch() | |