HumanizeBot / app.py
FILMITO's picture
Update app.py
6f55663 verified
raw
history blame
12.5 kB
import gradio as gr
import pretty_midi
import numpy as np
import tempfile
import os
import librosa
import torch
import torchaudio
from pathlib import Path
import soundfile as sf
import io
class MP3ToHumanized:
def __init__(self):
self.groove_profiles = {
"drums": {"timing_var": 0.02, "velocity_var": 15, "swing_factor": 0.1},
"melody": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05},
"bass": {"timing_var": 0.015, "velocity_var": 12, "swing_factor": 0.07},
"chords": {"timing_var": 0.008, "velocity_var": 8, "swing_factor": 0.03},
"other": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05}
}
def audio_to_midi(self, audio_path, conversion_method="basic"):
"""Convert audio file to MIDI using different methods"""
try:
# Load audio file
y, sr = librosa.load(audio_path, sr=22050)
if conversion_method == "basic":
return self.basic_audio_to_midi(y, sr)
elif conversion_method == "melody":
return self.melody_extraction_to_midi(y, sr)
else:
return self.rhythm_based_midi(y, sr)
except Exception as e:
raise Exception(f"Audio to MIDI conversion failed: {str(e)}")
def basic_audio_to_midi(self, y, sr):
"""Basic onset detection and pitch estimation"""
# Create a pretty_midi object
midi = pretty_midi.PrettyMIDI()
# Create instrument
piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
instrument = pretty_midi.Instrument(program=piano_program)
# Detect onsets (when notes start)
onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, backtrack=True)
onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
# Estimate pitch for each onset
for onset_time in onset_times:
# Extract a segment around the onset
start_sample = int(onset_time * sr)
end_sample = start_sample + int(0.5 * sr) # 500ms segment
if end_sample < len(y):
segment = y[start_sample:end_sample]
# Estimate fundamental frequency
f0 = self.estimate_pitch(segment, sr)
if f0 > 0:
# Convert frequency to MIDI note number
midi_note = int(69 + 12 * np.log2(f0 / 440.0))
# Only add if it's a valid MIDI note
if 0 <= midi_note <= 127:
# Create note
note = pretty_midi.Note(
velocity=np.random.randint(60, 100),
pitch=midi_note,
start=onset_time,
end=onset_time + 0.5 # 500ms duration
)
instrument.notes.append(note)
midi.instruments.append(instrument)
return midi
def melody_extraction_to_midi(self, y, sr):
"""Extract melody and convert to MIDI"""
midi = pretty_midi.PrettyMIDI()
instrument = pretty_midi.Instrument(program=0) # Piano
# Use librosa's melody extraction
f0, voiced_flag, voiced_probs = librosa.pyin(
y,
fmin=librosa.note_to_hz('C2'),
fmax=librosa.note_to_hz('C7'),
sr=sr
)
times = librosa.times_like(f0, sr=sr, hop_length=512)
current_note = None
note_start = 0
for time, freq, voiced in zip(times, f0, voiced_flag):
if voiced and not np.isnan(freq):
midi_note = int(69 + 12 * np.log2(freq / 440.0))
if 0 <= midi_note <= 127:
if current_note != midi_note:
if current_note is not None:
# End previous note
note = pretty_midi.Note(
velocity=80,
pitch=current_note,
start=note_start,
end=time
)
instrument.notes.append(note)
# Start new note
current_note = midi_note
note_start = time
else:
if current_note is not None:
# End current note
note = pretty_midi.Note(
velocity=80,
pitch=current_note,
start=note_start,
end=time
)
instrument.notes.append(note)
current_note = None
midi.instruments.append(instrument)
return midi
def rhythm_based_midi(self, y, sr):
"""Create rhythm-based MIDI from percussive elements"""
midi = pretty_midi.PrettyMIDI()
# Drum instrument
drum_instrument = pretty_midi.Instrument(program=0, is_drum=True)
# Detect strong beats and onsets
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
beat_times = librosa.frames_to_time(beats, sr=sr)
# Add drum hits on beats
for beat_time in beat_times:
# Kick drum on strong beats
note = pretty_midi.Note(
velocity=100,
pitch=36, # Kick drum
start=beat_time,
end=beat_time + 0.1
)
drum_instrument.notes.append(note)
midi.instruments.append(drum_instrument)
return midi
def estimate_pitch(self, segment, sr):
"""Estimate fundamental frequency from audio segment"""
try:
# Use autocorrelation for pitch detection
corr = np.correlate(segment, segment, mode='full')
corr = corr[len(corr)//2:]
# Find the first peak after zero lag (fundamental frequency)
d = np.diff(corr)
start = np.where(d > 0)[0]
if len(start) > 0:
start = start[0]
peak = np.argmax(corr[start:]) + start
freq = sr / peak if peak > 0 else 0
return freq
except:
pass
return 0
def humanize_midi(self, midi_data, intensity=0.7, style="organic"):
"""Humanize the MIDI data"""
tempo = midi_data.estimate_tempo() if len(midi_data.instruments) > 0 else 120
for instrument in midi_data.instruments:
inst_type = "drums" if instrument.is_drum else "melody"
profile = self.groove_profiles[inst_type]
for note in instrument.notes:
# Humanize timing
timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
note.start = max(0, note.start + timing_shift)
# Humanize duration (except drums)
if not instrument.is_drum:
duration_shift = np.random.normal(0, profile["timing_var"] * 0.3 * intensity)
note.end = max(note.start + 0.1, note.end + duration_shift)
# Humanize velocity
vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
new_velocity = note.velocity + int(vel_shift * intensity)
note.velocity = max(20, min(127, new_velocity))
return midi_data
def process_audio_files(files, intensity, style, conversion_method):
if not files:
return None, None, "Please upload audio files (MP3, WAV, etc.)"
converter = MP3ToHumanized()
processed_files = []
for file in files:
try:
# Convert audio to MIDI
midi_data = converter.audio_to_midi(file.name, conversion_method)
# Humanize the MIDI
humanized_midi = converter.humanize_midi(midi_data, intensity, style)
# Save humanized MIDI
output_path = tempfile.mktemp(suffix='_humanized.mid')
humanized_midi.write(output_path)
processed_files.append(output_path)
except Exception as e:
return None, None, f"Error processing {file.name}: {str(e)}"
if processed_files:
# Create audio preview from first file
preview_audio = None
try:
# Convert MIDI back to audio for preview
midi_data = pretty_midi.PrettyMIDI(processed_files[0])
audio_data = midi_data.synthesize()
preview_path = tempfile.mktemp(suffix='_preview.wav')
sf.write(preview_path, audio_data, 44100)
preview_audio = preview_path
except:
preview_audio = None
return processed_files, preview_audio, f"✅ Successfully processed {len(processed_files)} files!"
else:
return None, None, "❌ No files were processed successfully."
# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo:
gr.Markdown("""
# 🎵 MP3 HumanizeBot
**Convert MP3/Audio to MIDI and remove AI traces to sound human-made!**
Upload audio files from AI music generators, convert to MIDI, and apply natural humanization.
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 📁 Upload Audio Files")
file_input = gr.File(
file_count="multiple",
file_types=[".mp3", ".wav", ".ogg", ".m4a", ".flac"],
label="Upload Audio Files",
type="filepath"
)
conversion_method = gr.Radio(
["basic", "melody", "rhythm"],
value="basic",
label="🎵 Conversion Method",
info="Basic = general purpose, Melody = focus on tunes, Rhythm = focus on beats"
)
intensity = gr.Slider(
0.1, 1.0,
value=0.7,
label="🎚️ Humanization Intensity"
)
style = gr.Radio(
["organic", "groovy", "gentle"],
value="organic",
label="🎸 Humanization Style"
)
process_btn = gr.Button(
"✨ Convert & Humanize!",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
gr.Markdown("### 📥 Download Results")
file_output = gr.File(
file_count="multiple",
label="Download Humanized MIDI Files"
)
audio_output = gr.Audio(
label="MIDI Audio Preview",
interactive=False
)
status = gr.Textbox(
label="Status",
interactive=False,
max_lines=4
)
with gr.Accordion("ℹ️ How It Works", open=False):
gr.Markdown("""
**Process:**
1. **Upload** your AI-generated audio files (MP3, WAV, etc.)
2. **Convert** to MIDI using pitch and rhythm detection
3. **Humanize** with timing and velocity variations
4. **Download** humanized MIDI files
**Conversion Methods:**
- **Basic**: General purpose conversion for most music
- **Melody**: Focuses on extracting melodic content
- **Rhythm**: Focuses on drum patterns and beats
**Note**: Audio-to-MIDI conversion is challenging and works best with:
- Clear melodic lines
- Good audio quality
- Not too much reverb/effects
""")
process_btn.click(
fn=process_audio_files,
inputs=[file_input, intensity, style, conversion_method],
outputs=[file_output, audio_output, status]
)
if __name__ == "__main__":
demo.launch(debug=True)