Upload 3 files

Browse files

Files changed (3) hide show

ReadMe.md.txt +36 -0
python.py.txt +227 -0
requirements.txt.txt +8 -0

ReadMe.md.txt ADDED Viewed

	@@ -0,0 +1,36 @@

+# Basic Pitch - Audio to MIDI
+A Hugging Face Space for converting audio files to MIDI using Spotify's Basic Pitch model.
+## Features
+- **Automatic Music Transcription (AMT):** Converts audio to MIDI notation
+- **Instrument Agnostic:** Works with vocals, strings, brass, woodwinds, etc.
+- **CPU Optimized:** Lightweight model designed for CPU inference
+- **Polyphonic Detection:** Detects multiple simultaneous notes
+- **Easy to Use:** Simple Gradio web interface
+## How to Use
+1. Upload an audio file (`.wav`, `.mp3`, `.ogg`, `.flac`, `.m4a`)
+2. Click "Transcribe to MIDI"
+3. Download the resulting MIDI file
+## Model Information
+- **Model:** ICASSP 2022 (Spotify Basic Pitch)
+- **Size:** ~20 MB
+- **Inference Time:** ~1-2 seconds per minute of audio (CPU)
+- **Hardware:** No GPU required
+## Best Practices
+- Use mono audio for best results
+- Avoid heavy background noise
+- Works best with single instruments
+- Clear, high-quality recordings produce better results
+## References
+- [GitHub Repository](https://github.com/spotify/basic-pitch)
+- [Paper](https://arxiv.org/abs/2209.00799)

python.py.txt ADDED Viewed

	@@ -0,0 +1,227 @@

+"""
+Basic Pitch Audio-to-MIDI Converter
+Hugging Face Space for CPU inference
+July 2024 version
+"""
+import gradio as gr
+import numpy as np
+from basic_pitch.inference import predict
+from basic_pitch import ICASSP_2022_MODEL_PATH
+import tempfile
+import os
+def transcribe_audio(audio_input):
+    """
+    Transcribe audio to MIDI using Basic Pitch model.
+    Args:
+        audio_input: Tuple of (sample_rate, audio_array) from Gradio Audio component
+    Returns:
+        midi_file_path: Path to generated MIDI file
+        note_summary: Summary of detected notes
+    """
+    try:
+        if audio_input is None:
+            return None, "Please upload an audio file first."
+        sample_rate, audio_data = audio_input
+        # Create temporary directory for processing
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Save audio to temporary file
+            audio_path = os.path.join(tmpdir, "input_audio.wav")
+            import soundfile as sf
+            sf.write(audio_path, audio_data, sample_rate)
+            # Run Basic Pitch inference
+            model_output, midi_data, note_events = predict(
+                audio_path,
+                model_or_model_path=ICASSP_2022_MODEL_PATH,
+                onset_thresh=0.5,
+                frame_thresh=0.3,
+                minimum_note_length=127.70254248031496,
+                minimum_frequency=10,
+                maximum_frequency=2000,
+                melodia_trick=True,
+                sonify=False
+            )
+            # Save MIDI output
+            midi_path = os.path.join(tmpdir, "output.mid")
+            midi_data.write(midi_path)
+            # Generate note summary
+            note_summary = generate_note_summary(note_events)
+            return midi_path, note_summary
+    except Exception as e:
+        return None, f"Error: {str(e)}"
+def generate_note_summary(note_events):
+    """
+    Generate a human-readable summary of detected notes.
+    Args:
+        note_events: List of tuples (start_time, end_time, pitch_midi, amplitude, pitch_bends)
+    Returns:
+        Formatted string summary
+    """
+    if not note_events or len(note_events) == 0:
+        return "No notes detected in the audio."
+    summary = f"✓ Transcription Complete\n"
+    summary += f"Total notes detected: {len(note_events)}\n\n"
+    summary += "Note Events:\n"
+    summary += "-" * 70 + "\n"
+    summary += f"{'Start (s)':<12} {'End (s)':<12} {'MIDI':<8} {'Duration':<12} {'Amplitude':<12}\n"
+    summary += "-" * 70 + "\n"
+    for start_time, end_time, midi_pitch, amplitude, pitch_bends in note_events:
+        duration = end_time - start_time
+        summary += f"{start_time:<12.3f} {end_time:<12.3f} {midi_pitch:<8} {duration:<12.3f} {amplitude:<12.3f}\n"
+    summary += "-" * 70 + "\n"
+    # Calculate statistics
+    avg_duration = np.mean([end - start for start, end, _, _, _ in note_events])
+    avg_amplitude = np.mean([amp for _, _, _, amp, _ in note_events])
+    summary += f"\nStatistics:\n"
+    summary += f"Average note duration: {avg_duration:.3f}s\n"
+    summary += f"Average amplitude: {avg_amplitude:.3f}\n"
+    return summary
+def create_gradio_interface():
+    """
+    Create the Gradio interface for Basic Pitch transcription.
+    """
+    with gr.Blocks(title="Basic Pitch - Audio to MIDI") as demo:
+        gr.Markdown("""
+        # 🎵 Basic Pitch: Automatic Music Transcription
+        Convert audio files to MIDI notation using Spotify's **Basic Pitch** model.
+        This lightweight neural network performs **automatic music transcription (AMT)**
+        and works with any instrument or voice.
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### 📤 Input")
+                audio_input = gr.Audio(
+                    label="Upload Audio File",
+                    type="numpy",
+                    sources=["upload", "microphone"]
+                )
+                gr.Markdown("""
+                **Supported formats:**
+                - `.wav`, `.mp3`, `.ogg`, `.flac`, `.m4a`
+                **Recommended:**
+                - Mono audio (single instrument)
+                - Clear, high-quality recordings
+                - 30 seconds to 5 minutes duration
+                """)
+                transcribe_btn = gr.Button(
+                    "🎼 Transcribe to MIDI",
+                    variant="primary",
+                    size="lg"
+                )
+            with gr.Column(scale=1):
+                gr.Markdown("### 📥 Output")
+                midi_file = gr.File(
+                    label="Download MIDI",
+                    type="filepath"
+                )
+                note_info = gr.Textbox(
+                    label="Note Detection Summary",
+                    lines=15,
+                    interactive=False,
+                    max_lines=20
+                )
+        gr.Markdown("""
+        ---
+        ### ⚙️ Model Details
+        **Model:** ICASSP 2022 (Spotify Basic Pitch)
+        - Lightweight: ~20 MB
+        - CPU-optimized inference
+        - No GPU required
+        **Detection Parameters:**
+        - Onset threshold: 0.5 (note attack sensitivity)
+        - Frame threshold: 0.3 (note sustain sensitivity)
+        - Frequency range: 10 Hz - 2000 Hz
+        - Melodia post-processing: Enabled
+        **Output:**
+        - MIDI file with detected notes
+        - Note timing and pitch information
+        - Amplitude/velocity data
+        """)
+        gr.Markdown("""
+        ---
+        ### 💡 Tips for Best Results
+        1. **Single instrument:** Works best with one instrument or voice
+        2. **Mono audio:** Use mono recordings when possible
+        3. **Clear audio:** Avoid background noise
+        4. **Duration:** Works with any length, but 30s-5min is typical
+        5. **Polyphonic:** Can detect multiple simultaneous notes
+        **Limitations:**
+        - Works best with pitched instruments (not drums)
+        - May struggle with very fast passages
+        - Polyphonic music may need manual correction
+        """)
+        gr.Markdown("""
+        ---
+        ### 📚 About Basic Pitch
+        Developed by [Spotify's Audio Intelligence Lab](https://github.com/spotify/basic-pitch)
+        **Citation:**
+        ```
+        Basic Pitch: A Lightweight Yet Effective Pitch Detection Model
+        for Automatic Music Transcription
+        Spotify, 2022
+        ```
+        """)
+        # Connect button to function
+        transcribe_btn.click(
+            fn=transcribe_audio,
+            inputs=[audio_input],
+            outputs=[midi_file, note_info]
+        )
+    return demo
+if __name__ == "__main__":
+    interface = create_gradio_interface()
+    interface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )

requirements.txt.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+basic-pitch==0.3.13
+gradio==4.18.0
+librosa==0.10.0
+numpy==1.24.3
+pretty-midi==0.2.10
+scipy==1.11.0
+soundfile==0.12.1
+resampy==0.4.2