Spaces:

Aliwan
/

FreeCPU-MP3toMIDI-Converter

Build error

App Files Files Community

FreeCPU-MP3toMIDI-Converter / app.py

Aliwan

Update app.py

72e92ef verified 24 days ago

raw

history blame contribute delete

5.78 kB

	"""
	Basic Pitch Audio-to-MIDI Converter
	Hugging Face Space for CPU inference
	Made with CopyPaste, Ai assisted
	"""

	import gradio as gr
	import numpy as np
	from basic_pitch.inference import predict
	from basic_pitch import ICASSP_2022_MODEL_PATH
	import tempfile
	import os
	from pathlib import Path


	def transcribe_audio(audio_input):
	"""
	Transcribe audio to MIDI using Basic Pitch model.

	Args:
	audio_input: Audio file path or tuple (sample_rate, audio_array)

	Returns:
	midi_file_path: Path to generated MIDI file
	note_events: List of detected note events
	"""
	try:
	# Handle different input formats
	if isinstance(audio_input, tuple):
	sample_rate, audio_array = audio_input
	# Create temporary audio file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	import soundfile as sf
	sf.write(tmp.name, audio_array, sample_rate)
	audio_path = tmp.name
	else:
	audio_path = audio_input

	# Create temporary output directory
	with tempfile.TemporaryDirectory() as output_dir:
	# Run inference
	model_output, midi_data, note_events = predict(
	audio_path,
	model_or_model_path=ICASSP_2022_MODEL_PATH,
	onset_thresh=0.5,
	frame_thresh=0.3,
	minimum_note_length=127.70254248031496,
	minimum_frequency=10,
	maximum_frequency=2000,
	melodia_trick=True,
	sonify=False
	)

	# Save MIDI file
	midi_output_path = os.path.join(output_dir, "output.mid")
	midi_data.write(midi_output_path)

	# Format note events for display
	note_info = format_note_events(note_events)

	# Copy MIDI to persistent location
	final_midi_path = "/tmp/basic_pitch_output.mid"
	midi_data.write(final_midi_path)

	return final_midi_path, note_info

	except Exception as e:
	return None, f"Error during transcription: {str(e)}"


	def format_note_events(note_events):
	"""
	Format note events into readable text.

	Args:
	note_events: List of (start_time, end_time, pitch_midi, amplitude, pitch_bends)

	Returns:
	Formatted string with note information
	"""
	if not note_events:
	return "No notes detected."

	output = "Detected Notes:\n"
	output += "-" * 60 + "\n"
	output += f"{'Start (s)':<12} {'End (s)':<12} {'MIDI Pitch':<12} {'Amplitude':<12}\n"
	output += "-" * 60 + "\n"

	for start, end, pitch, amplitude, _ in note_events:
	output += f"{start:<12.3f} {end:<12.3f} {pitch:<12} {amplitude:<12.3f}\n"

	output += "-" * 60 + "\n"
	output += f"Total notes detected: {len(note_events)}"

	return output


	def create_interface():
	"""
	Create Gradio interface for Basic Pitch.
	"""

	with gr.Blocks(title="Basic Pitch - Audio to MIDI") as demo:
	gr.Markdown("""
	# 🎵 Basic Pitch: Audio-to-MIDI Transcription

	Convert audio files to MIDI using Spotify's lightweight neural network model.

	Supported formats: `.mp3`, `.wav`, `.ogg`, `.flac`, `.m4a`

	Features:
	- Automatic music transcription (AMT)
	- Polyphonic note detection
	- Pitch bend estimation
	- Instrument-agnostic (works with vocals, strings, brass, etc.)

	Note: Works best with single-instrument audio. Mono audio is recommended.
	""")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### Input")
	audio_input = gr.Audio(
	label="Upload Audio File",
	type="filepath",
	sources=["upload", "microphone"]
	)

	submit_btn = gr.Button(
	"🎼 Transcribe to MIDI",
	variant="primary"
	)

	with gr.Column():
	gr.Markdown("### Output")
	midi_output = gr.File(
	label="Download MIDI File",
	type="filepath"
	)
	note_events_output = gr.Textbox(
	label="Detected Notes",
	lines=10,
	interactive=False
	)

	gr.Markdown("""
	### Parameters
	- Onset Threshold: Minimum amplitude for onset detection (0.5)
	- Frame Threshold: Minimum amplitude for note frames (0.3)
	- Melodia Trick: Post-processing for melody extraction (enabled)
	- Frequency Range: 10 Hz - 2000 Hz
	""")

	gr.Markdown("""
	### Tips
	1. Best results: Single instrument, mono audio, clear recordings
	2. Audio quality: Higher quality audio produces better transcriptions
	3. Duration: Works with any length, but longer files take more time
	4. Polyphonic: Detects multiple simultaneous notes

	### About Basic Pitch
	Developed by Spotify's Audio Intelligence Lab. See the [GitHub repo](https://github.com/spotify/basic-pitch) for more info.
	""")

	# Connect submit button
	submit_btn.click(
	fn=transcribe_audio,
	inputs=[audio_input],
	outputs=[midi_output, note_events_output]
	)

	return demo


	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()