Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pretty_midi | |
| import numpy as np | |
| import tempfile | |
| import os | |
| import librosa | |
| import torch | |
| import torchaudio | |
| from pathlib import Path | |
| import soundfile as sf | |
| import io | |
| class MP3ToHumanized: | |
| def __init__(self): | |
| self.groove_profiles = { | |
| "drums": {"timing_var": 0.02, "velocity_var": 15, "swing_factor": 0.1}, | |
| "melody": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05}, | |
| "bass": {"timing_var": 0.015, "velocity_var": 12, "swing_factor": 0.07}, | |
| "chords": {"timing_var": 0.008, "velocity_var": 8, "swing_factor": 0.03}, | |
| "other": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05} | |
| } | |
| def audio_to_midi(self, audio_path, conversion_method="basic"): | |
| """Convert audio file to MIDI using different methods""" | |
| try: | |
| # Load audio file | |
| y, sr = librosa.load(audio_path, sr=22050) | |
| if conversion_method == "basic": | |
| return self.basic_audio_to_midi(y, sr) | |
| elif conversion_method == "melody": | |
| return self.melody_extraction_to_midi(y, sr) | |
| else: | |
| return self.rhythm_based_midi(y, sr) | |
| except Exception as e: | |
| raise Exception(f"Audio to MIDI conversion failed: {str(e)}") | |
| def basic_audio_to_midi(self, y, sr): | |
| """Basic onset detection and pitch estimation""" | |
| # Create a pretty_midi object | |
| midi = pretty_midi.PrettyMIDI() | |
| # Create instrument | |
| piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano') | |
| instrument = pretty_midi.Instrument(program=piano_program) | |
| # Detect onsets (when notes start) | |
| onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, backtrack=True) | |
| onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512) | |
| # Estimate pitch for each onset | |
| for onset_time in onset_times: | |
| # Extract a segment around the onset | |
| start_sample = int(onset_time * sr) | |
| end_sample = start_sample + int(0.5 * sr) # 500ms segment | |
| if end_sample < len(y): | |
| segment = y[start_sample:end_sample] | |
| # Estimate fundamental frequency | |
| f0 = self.estimate_pitch(segment, sr) | |
| if f0 > 0: | |
| # Convert frequency to MIDI note number | |
| midi_note = int(69 + 12 * np.log2(f0 / 440.0)) | |
| # Only add if it's a valid MIDI note | |
| if 0 <= midi_note <= 127: | |
| # Create note | |
| note = pretty_midi.Note( | |
| velocity=np.random.randint(60, 100), | |
| pitch=midi_note, | |
| start=onset_time, | |
| end=onset_time + 0.5 # 500ms duration | |
| ) | |
| instrument.notes.append(note) | |
| midi.instruments.append(instrument) | |
| return midi | |
| def melody_extraction_to_midi(self, y, sr): | |
| """Extract melody and convert to MIDI""" | |
| midi = pretty_midi.PrettyMIDI() | |
| instrument = pretty_midi.Instrument(program=0) # Piano | |
| # Use librosa's melody extraction | |
| f0, voiced_flag, voiced_probs = librosa.pyin( | |
| y, | |
| fmin=librosa.note_to_hz('C2'), | |
| fmax=librosa.note_to_hz('C7'), | |
| sr=sr | |
| ) | |
| times = librosa.times_like(f0, sr=sr, hop_length=512) | |
| current_note = None | |
| note_start = 0 | |
| for time, freq, voiced in zip(times, f0, voiced_flag): | |
| if voiced and not np.isnan(freq): | |
| midi_note = int(69 + 12 * np.log2(freq / 440.0)) | |
| if 0 <= midi_note <= 127: | |
| if current_note != midi_note: | |
| if current_note is not None: | |
| # End previous note | |
| note = pretty_midi.Note( | |
| velocity=80, | |
| pitch=current_note, | |
| start=note_start, | |
| end=time | |
| ) | |
| instrument.notes.append(note) | |
| # Start new note | |
| current_note = midi_note | |
| note_start = time | |
| else: | |
| if current_note is not None: | |
| # End current note | |
| note = pretty_midi.Note( | |
| velocity=80, | |
| pitch=current_note, | |
| start=note_start, | |
| end=time | |
| ) | |
| instrument.notes.append(note) | |
| current_note = None | |
| midi.instruments.append(instrument) | |
| return midi | |
| def rhythm_based_midi(self, y, sr): | |
| """Create rhythm-based MIDI from percussive elements""" | |
| midi = pretty_midi.PrettyMIDI() | |
| # Drum instrument | |
| drum_instrument = pretty_midi.Instrument(program=0, is_drum=True) | |
| # Detect strong beats and onsets | |
| tempo, beats = librosa.beat.beat_track(y=y, sr=sr) | |
| beat_times = librosa.frames_to_time(beats, sr=sr) | |
| # Add drum hits on beats | |
| for beat_time in beat_times: | |
| # Kick drum on strong beats | |
| note = pretty_midi.Note( | |
| velocity=100, | |
| pitch=36, # Kick drum | |
| start=beat_time, | |
| end=beat_time + 0.1 | |
| ) | |
| drum_instrument.notes.append(note) | |
| midi.instruments.append(drum_instrument) | |
| return midi | |
| def estimate_pitch(self, segment, sr): | |
| """Estimate fundamental frequency from audio segment""" | |
| try: | |
| # Use autocorrelation for pitch detection | |
| corr = np.correlate(segment, segment, mode='full') | |
| corr = corr[len(corr)//2:] | |
| # Find the first peak after zero lag (fundamental frequency) | |
| d = np.diff(corr) | |
| start = np.where(d > 0)[0] | |
| if len(start) > 0: | |
| start = start[0] | |
| peak = np.argmax(corr[start:]) + start | |
| freq = sr / peak if peak > 0 else 0 | |
| return freq | |
| except: | |
| pass | |
| return 0 | |
| def humanize_midi(self, midi_data, intensity=0.7, style="organic"): | |
| """Humanize the MIDI data""" | |
| tempo = midi_data.estimate_tempo() if len(midi_data.instruments) > 0 else 120 | |
| for instrument in midi_data.instruments: | |
| inst_type = "drums" if instrument.is_drum else "melody" | |
| profile = self.groove_profiles[inst_type] | |
| for note in instrument.notes: | |
| # Humanize timing | |
| timing_shift = np.random.normal(0, profile["timing_var"] * intensity) | |
| note.start = max(0, note.start + timing_shift) | |
| # Humanize duration (except drums) | |
| if not instrument.is_drum: | |
| duration_shift = np.random.normal(0, profile["timing_var"] * 0.3 * intensity) | |
| note.end = max(note.start + 0.1, note.end + duration_shift) | |
| # Humanize velocity | |
| vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"]) | |
| new_velocity = note.velocity + int(vel_shift * intensity) | |
| note.velocity = max(20, min(127, new_velocity)) | |
| return midi_data | |
| def process_audio_files(files, intensity, style, conversion_method): | |
| if not files: | |
| return None, None, "Please upload audio files (MP3, WAV, etc.)" | |
| converter = MP3ToHumanized() | |
| processed_files = [] | |
| for file in files: | |
| try: | |
| # Convert audio to MIDI | |
| midi_data = converter.audio_to_midi(file.name, conversion_method) | |
| # Humanize the MIDI | |
| humanized_midi = converter.humanize_midi(midi_data, intensity, style) | |
| # Save humanized MIDI | |
| output_path = tempfile.mktemp(suffix='_humanized.mid') | |
| humanized_midi.write(output_path) | |
| processed_files.append(output_path) | |
| except Exception as e: | |
| return None, None, f"Error processing {file.name}: {str(e)}" | |
| if processed_files: | |
| # Create audio preview from first file | |
| preview_audio = None | |
| try: | |
| # Convert MIDI back to audio for preview | |
| midi_data = pretty_midi.PrettyMIDI(processed_files[0]) | |
| audio_data = midi_data.synthesize() | |
| preview_path = tempfile.mktemp(suffix='_preview.wav') | |
| sf.write(preview_path, audio_data, 44100) | |
| preview_audio = preview_path | |
| except: | |
| preview_audio = None | |
| return processed_files, preview_audio, f"✅ Successfully processed {len(processed_files)} files!" | |
| else: | |
| return None, None, "❌ No files were processed successfully." | |
| # Create Gradio interface | |
| with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo: | |
| gr.Markdown(""" | |
| # 🎵 MP3 HumanizeBot | |
| **Convert MP3/Audio to MIDI and remove AI traces to sound human-made!** | |
| Upload audio files from AI music generators, convert to MIDI, and apply natural humanization. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📁 Upload Audio Files") | |
| file_input = gr.File( | |
| file_count="multiple", | |
| file_types=[".mp3", ".wav", ".ogg", ".m4a", ".flac"], | |
| label="Upload Audio Files", | |
| type="filepath" | |
| ) | |
| conversion_method = gr.Radio( | |
| ["basic", "melody", "rhythm"], | |
| value="basic", | |
| label="🎵 Conversion Method", | |
| info="Basic = general purpose, Melody = focus on tunes, Rhythm = focus on beats" | |
| ) | |
| intensity = gr.Slider( | |
| 0.1, 1.0, | |
| value=0.7, | |
| label="🎚️ Humanization Intensity" | |
| ) | |
| style = gr.Radio( | |
| ["organic", "groovy", "gentle"], | |
| value="organic", | |
| label="🎸 Humanization Style" | |
| ) | |
| process_btn = gr.Button( | |
| "✨ Convert & Humanize!", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📥 Download Results") | |
| file_output = gr.File( | |
| file_count="multiple", | |
| label="Download Humanized MIDI Files" | |
| ) | |
| audio_output = gr.Audio( | |
| label="MIDI Audio Preview", | |
| interactive=False | |
| ) | |
| status = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| max_lines=4 | |
| ) | |
| with gr.Accordion("ℹ️ How It Works", open=False): | |
| gr.Markdown(""" | |
| **Process:** | |
| 1. **Upload** your AI-generated audio files (MP3, WAV, etc.) | |
| 2. **Convert** to MIDI using pitch and rhythm detection | |
| 3. **Humanize** with timing and velocity variations | |
| 4. **Download** humanized MIDI files | |
| **Conversion Methods:** | |
| - **Basic**: General purpose conversion for most music | |
| - **Melody**: Focuses on extracting melodic content | |
| - **Rhythm**: Focuses on drum patterns and beats | |
| **Note**: Audio-to-MIDI conversion is challenging and works best with: | |
| - Clear melodic lines | |
| - Good audio quality | |
| - Not too much reverb/effects | |
| """) | |
| process_btn.click( | |
| fn=process_audio_files, | |
| inputs=[file_input, intensity, style, conversion_method], | |
| outputs=[file_output, audio_output, status] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) |