Aliwan's picture
Update app.py
72e92ef verified
"""
Basic Pitch Audio-to-MIDI Converter
Hugging Face Space for CPU inference
Made with CopyPaste, Ai assisted
"""
import gradio as gr
import numpy as np
from basic_pitch.inference import predict
from basic_pitch import ICASSP_2022_MODEL_PATH
import tempfile
import os
from pathlib import Path
def transcribe_audio(audio_input):
"""
Transcribe audio to MIDI using Basic Pitch model.
Args:
audio_input: Audio file path or tuple (sample_rate, audio_array)
Returns:
midi_file_path: Path to generated MIDI file
note_events: List of detected note events
"""
try:
# Handle different input formats
if isinstance(audio_input, tuple):
sample_rate, audio_array = audio_input
# Create temporary audio file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
import soundfile as sf
sf.write(tmp.name, audio_array, sample_rate)
audio_path = tmp.name
else:
audio_path = audio_input
# Create temporary output directory
with tempfile.TemporaryDirectory() as output_dir:
# Run inference
model_output, midi_data, note_events = predict(
audio_path,
model_or_model_path=ICASSP_2022_MODEL_PATH,
onset_thresh=0.5,
frame_thresh=0.3,
minimum_note_length=127.70254248031496,
minimum_frequency=10,
maximum_frequency=2000,
melodia_trick=True,
sonify=False
)
# Save MIDI file
midi_output_path = os.path.join(output_dir, "output.mid")
midi_data.write(midi_output_path)
# Format note events for display
note_info = format_note_events(note_events)
# Copy MIDI to persistent location
final_midi_path = "/tmp/basic_pitch_output.mid"
midi_data.write(final_midi_path)
return final_midi_path, note_info
except Exception as e:
return None, f"Error during transcription: {str(e)}"
def format_note_events(note_events):
"""
Format note events into readable text.
Args:
note_events: List of (start_time, end_time, pitch_midi, amplitude, pitch_bends)
Returns:
Formatted string with note information
"""
if not note_events:
return "No notes detected."
output = "Detected Notes:\n"
output += "-" * 60 + "\n"
output += f"{'Start (s)':<12} {'End (s)':<12} {'MIDI Pitch':<12} {'Amplitude':<12}\n"
output += "-" * 60 + "\n"
for start, end, pitch, amplitude, _ in note_events:
output += f"{start:<12.3f} {end:<12.3f} {pitch:<12} {amplitude:<12.3f}\n"
output += "-" * 60 + "\n"
output += f"Total notes detected: {len(note_events)}"
return output
def create_interface():
"""
Create Gradio interface for Basic Pitch.
"""
with gr.Blocks(title="Basic Pitch - Audio to MIDI") as demo:
gr.Markdown("""
# 🎵 Basic Pitch: Audio-to-MIDI Transcription
Convert audio files to MIDI using Spotify's lightweight neural network model.
**Supported formats:** `.mp3`, `.wav`, `.ogg`, `.flac`, `.m4a`
**Features:**
- Automatic music transcription (AMT)
- Polyphonic note detection
- Pitch bend estimation
- Instrument-agnostic (works with vocals, strings, brass, etc.)
**Note:** Works best with single-instrument audio. Mono audio is recommended.
""")
with gr.Row():
with gr.Column():
gr.Markdown("### Input")
audio_input = gr.Audio(
label="Upload Audio File",
type="filepath",
sources=["upload", "microphone"]
)
submit_btn = gr.Button(
"🎼 Transcribe to MIDI",
variant="primary"
)
with gr.Column():
gr.Markdown("### Output")
midi_output = gr.File(
label="Download MIDI File",
type="filepath"
)
note_events_output = gr.Textbox(
label="Detected Notes",
lines=10,
interactive=False
)
gr.Markdown("""
### Parameters
- **Onset Threshold:** Minimum amplitude for onset detection (0.5)
- **Frame Threshold:** Minimum amplitude for note frames (0.3)
- **Melodia Trick:** Post-processing for melody extraction (enabled)
- **Frequency Range:** 10 Hz - 2000 Hz
""")
gr.Markdown("""
### Tips
1. **Best results:** Single instrument, mono audio, clear recordings
2. **Audio quality:** Higher quality audio produces better transcriptions
3. **Duration:** Works with any length, but longer files take more time
4. **Polyphonic:** Detects multiple simultaneous notes
### About Basic Pitch
Developed by Spotify's Audio Intelligence Lab. See the [GitHub repo](https://github.com/spotify/basic-pitch) for more info.
""")
# Connect submit button
submit_btn.click(
fn=transcribe_audio,
inputs=[audio_input],
outputs=[midi_output, note_events_output]
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch()