Spaces:

FILMITO
/

HumanizeBot

Sleeping

App Files Files Community

HumanizeBot / app.py

FILMITO

Update app.py

6f55663 verified 3 months ago

raw

history blame

12.5 kB

	import gradio as gr
	import pretty_midi
	import numpy as np
	import tempfile
	import os
	import librosa
	import torch
	import torchaudio
	from pathlib import Path
	import soundfile as sf
	import io

	class MP3ToHumanized:
	def __init__(self):
	self.groove_profiles = {
	"drums": {"timing_var": 0.02, "velocity_var": 15, "swing_factor": 0.1},
	"melody": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05},
	"bass": {"timing_var": 0.015, "velocity_var": 12, "swing_factor": 0.07},
	"chords": {"timing_var": 0.008, "velocity_var": 8, "swing_factor": 0.03},
	"other": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05}
	}

	def audio_to_midi(self, audio_path, conversion_method="basic"):
	"""Convert audio file to MIDI using different methods"""
	try:
	# Load audio file
	y, sr = librosa.load(audio_path, sr=22050)

	if conversion_method == "basic":
	return self.basic_audio_to_midi(y, sr)
	elif conversion_method == "melody":
	return self.melody_extraction_to_midi(y, sr)
	else:
	return self.rhythm_based_midi(y, sr)

	except Exception as e:
	raise Exception(f"Audio to MIDI conversion failed: {str(e)}")

	def basic_audio_to_midi(self, y, sr):
	"""Basic onset detection and pitch estimation"""
	# Create a pretty_midi object
	midi = pretty_midi.PrettyMIDI()

	# Create instrument
	piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
	instrument = pretty_midi.Instrument(program=piano_program)

	# Detect onsets (when notes start)
	onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, backtrack=True)
	onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)

	# Estimate pitch for each onset
	for onset_time in onset_times:
	# Extract a segment around the onset
	start_sample = int(onset_time * sr)
	end_sample = start_sample + int(0.5 * sr) # 500ms segment

	if end_sample < len(y):
	segment = y[start_sample:end_sample]

	# Estimate fundamental frequency
	f0 = self.estimate_pitch(segment, sr)

	if f0 > 0:
	# Convert frequency to MIDI note number
	midi_note = int(69 + 12 * np.log2(f0 / 440.0))

	# Only add if it's a valid MIDI note
	if 0 <= midi_note <= 127:
	# Create note
	note = pretty_midi.Note(
	velocity=np.random.randint(60, 100),
	pitch=midi_note,
	start=onset_time,
	end=onset_time + 0.5 # 500ms duration
	)
	instrument.notes.append(note)

	midi.instruments.append(instrument)
	return midi

	def melody_extraction_to_midi(self, y, sr):
	"""Extract melody and convert to MIDI"""
	midi = pretty_midi.PrettyMIDI()
	instrument = pretty_midi.Instrument(program=0) # Piano

	# Use librosa's melody extraction
	f0, voiced_flag, voiced_probs = librosa.pyin(
	y,
	fmin=librosa.note_to_hz('C2'),
	fmax=librosa.note_to_hz('C7'),
	sr=sr
	)

	times = librosa.times_like(f0, sr=sr, hop_length=512)

	current_note = None
	note_start = 0

	for time, freq, voiced in zip(times, f0, voiced_flag):
	if voiced and not np.isnan(freq):
	midi_note = int(69 + 12 * np.log2(freq / 440.0))
	if 0 <= midi_note <= 127:
	if current_note != midi_note:
	if current_note is not None:
	# End previous note
	note = pretty_midi.Note(
	velocity=80,
	pitch=current_note,
	start=note_start,
	end=time
	)
	instrument.notes.append(note)

	# Start new note
	current_note = midi_note
	note_start = time
	else:
	if current_note is not None:
	# End current note
	note = pretty_midi.Note(
	velocity=80,
	pitch=current_note,
	start=note_start,
	end=time
	)
	instrument.notes.append(note)
	current_note = None

	midi.instruments.append(instrument)
	return midi

	def rhythm_based_midi(self, y, sr):
	"""Create rhythm-based MIDI from percussive elements"""
	midi = pretty_midi.PrettyMIDI()

	# Drum instrument
	drum_instrument = pretty_midi.Instrument(program=0, is_drum=True)

	# Detect strong beats and onsets
	tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
	beat_times = librosa.frames_to_time(beats, sr=sr)

	# Add drum hits on beats
	for beat_time in beat_times:
	# Kick drum on strong beats
	note = pretty_midi.Note(
	velocity=100,
	pitch=36, # Kick drum
	start=beat_time,
	end=beat_time + 0.1
	)
	drum_instrument.notes.append(note)

	midi.instruments.append(drum_instrument)
	return midi

	def estimate_pitch(self, segment, sr):
	"""Estimate fundamental frequency from audio segment"""
	try:
	# Use autocorrelation for pitch detection
	corr = np.correlate(segment, segment, mode='full')
	corr = corr[len(corr)//2:]

	# Find the first peak after zero lag (fundamental frequency)
	d = np.diff(corr)
	start = np.where(d > 0)[0]
	if len(start) > 0:
	start = start[0]
	peak = np.argmax(corr[start:]) + start
	freq = sr / peak if peak > 0 else 0
	return freq
	except:
	pass
	return 0

	def humanize_midi(self, midi_data, intensity=0.7, style="organic"):
	"""Humanize the MIDI data"""
	tempo = midi_data.estimate_tempo() if len(midi_data.instruments) > 0 else 120

	for instrument in midi_data.instruments:
	inst_type = "drums" if instrument.is_drum else "melody"
	profile = self.groove_profiles[inst_type]

	for note in instrument.notes:
	# Humanize timing
	timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
	note.start = max(0, note.start + timing_shift)

	# Humanize duration (except drums)
	if not instrument.is_drum:
	duration_shift = np.random.normal(0, profile["timing_var"] * 0.3 * intensity)
	note.end = max(note.start + 0.1, note.end + duration_shift)

	# Humanize velocity
	vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
	new_velocity = note.velocity + int(vel_shift * intensity)
	note.velocity = max(20, min(127, new_velocity))

	return midi_data

	def process_audio_files(files, intensity, style, conversion_method):
	if not files:
	return None, None, "Please upload audio files (MP3, WAV, etc.)"

	converter = MP3ToHumanized()
	processed_files = []

	for file in files:
	try:
	# Convert audio to MIDI
	midi_data = converter.audio_to_midi(file.name, conversion_method)

	# Humanize the MIDI
	humanized_midi = converter.humanize_midi(midi_data, intensity, style)

	# Save humanized MIDI
	output_path = tempfile.mktemp(suffix='_humanized.mid')
	humanized_midi.write(output_path)
	processed_files.append(output_path)

	except Exception as e:
	return None, None, f"Error processing {file.name}: {str(e)}"

	if processed_files:
	# Create audio preview from first file
	preview_audio = None
	try:
	# Convert MIDI back to audio for preview
	midi_data = pretty_midi.PrettyMIDI(processed_files[0])
	audio_data = midi_data.synthesize()
	preview_path = tempfile.mktemp(suffix='_preview.wav')
	sf.write(preview_path, audio_data, 44100)
	preview_audio = preview_path
	except:
	preview_audio = None

	return processed_files, preview_audio, f"✅ Successfully processed {len(processed_files)} files!"
	else:
	return None, None, "❌ No files were processed successfully."

	# Create Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo:
	gr.Markdown("""
	# 🎵 MP3 HumanizeBot
	Convert MP3/Audio to MIDI and remove AI traces to sound human-made!

	Upload audio files from AI music generators, convert to MIDI, and apply natural humanization.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📁 Upload Audio Files")

	file_input = gr.File(
	file_count="multiple",
	file_types=[".mp3", ".wav", ".ogg", ".m4a", ".flac"],
	label="Upload Audio Files",
	type="filepath"
	)

	conversion_method = gr.Radio(
	["basic", "melody", "rhythm"],
	value="basic",
	label="🎵 Conversion Method",
	info="Basic = general purpose, Melody = focus on tunes, Rhythm = focus on beats"
	)

	intensity = gr.Slider(
	0.1, 1.0,
	value=0.7,
	label="🎚️ Humanization Intensity"
	)

	style = gr.Radio(
	["organic", "groovy", "gentle"],
	value="organic",
	label="🎸 Humanization Style"
	)

	process_btn = gr.Button(
	"✨ Convert & Humanize!",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	gr.Markdown("### 📥 Download Results")

	file_output = gr.File(
	file_count="multiple",
	label="Download Humanized MIDI Files"
	)

	audio_output = gr.Audio(
	label="MIDI Audio Preview",
	interactive=False
	)

	status = gr.Textbox(
	label="Status",
	interactive=False,
	max_lines=4
	)

	with gr.Accordion("ℹ️ How It Works", open=False):
	gr.Markdown("""
	Process:
	1. Upload your AI-generated audio files (MP3, WAV, etc.)
	2. Convert to MIDI using pitch and rhythm detection
	3. Humanize with timing and velocity variations
	4. Download humanized MIDI files

	Conversion Methods:
	- Basic: General purpose conversion for most music
	- Melody: Focuses on extracting melodic content
	- Rhythm: Focuses on drum patterns and beats

	Note: Audio-to-MIDI conversion is challenging and works best with:
	- Clear melodic lines
	- Good audio quality
	- Not too much reverb/effects
	""")

	process_btn.click(
	fn=process_audio_files,
	inputs=[file_input, intensity, style, conversion_method],
	outputs=[file_output, audio_output, status]
	)

	if __name__ == "__main__":
	demo.launch(debug=True)