Spaces:

FILMITO
/

HumanizeBot

Sleeping

App Files Files Community

HumanizeBot / app.py

FILMITO

Update app.py

20ef8ef verified 3 months ago

raw

history blame

12 kB

	import gradio as gr
	import numpy as np
	import tempfile
	import librosa
	import soundfile as sf
	from scipy import signal
	import os

	class AIHumanizer:
	def __init__(self):
	pass

	def humanize_audio(self, audio_path, intensity=0.7):
	"""Remove AI artifacts and make audio sound human-made"""
	try:
	print(f"Loading audio from: {audio_path}")

	# Load the full song - handle both mono and stereo
	y, sr = librosa.load(audio_path, sr=None, mono=False)

	print(f"Audio loaded: shape={y.shape}, sr={sr}, duration={len(y)/sr:.2f}s")

	# If stereo, process both channels
	if len(y.shape) > 1:
	print("Processing stereo audio...")
	processed_channels = []
	for i, channel in enumerate(y):
	print(f"Processing channel {i+1}...")
	processed_channel = self.process_channel(channel, sr, intensity)
	processed_channels.append(processed_channel)
	y_processed = np.array(processed_channels)
	else:
	print("Processing mono audio...")
	y_processed = self.process_channel(y, sr, intensity)
	y_processed = np.array([y_processed]) # Make it 2D for consistency

	print("Audio processing completed successfully")
	return y_processed, sr

	except Exception as e:
	print(f"Error in humanize_audio: {str(e)}")
	raise Exception(f"Humanization failed: {str(e)}")

	def process_channel(self, y, sr, intensity):
	"""Process a single audio channel to remove AI artifacts"""
	print(f"Processing channel: {len(y)} samples, intensity={intensity}")

	# 1. Reduce robotic frequencies
	y_processed = self.reduce_ai_artifacts(y, sr, intensity)

	# 2. Add timing variations
	y_processed = self.add_timing_variations(y_processed, sr, intensity)

	# 3. Add pitch variations
	y_processed = self.add_pitch_variations(y_processed, sr, intensity)

	# 4. Add room ambiance
	y_processed = self.add_room_ambiance(y_processed, sr, intensity)

	# 5. Add analog warmth
	y_processed = self.add_analog_warmth(y_processed, sr, intensity)

	# 6. Reduce perfect quantization
	y_processed = self.reduce_perfect_quantization(y_processed, sr, intensity)

	return y_processed

	def reduce_ai_artifacts(self, y, sr, intensity):
	"""Reduce common AI audio artifacts"""
	# Reduce harsh frequencies in the 2kHz-6kHz range (common AI artifacts)
	if sr > 4000: # Only if sample rate is high enough
	sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
	y_filtered = signal.sosfilt(sos, y)

	# Blend with original based on intensity
	y_processed = y * (1 - intensity0.3) + y_filtered (intensity*0.3)
	return y_processed
	return y

	def add_timing_variations(self, y, sr, intensity):
	"""Add subtle timing variations"""
	if intensity < 0.1:
	return y

	# Create small random speed variations
	segment_size = int(sr * 2.0) # 2-second segments
	segments = []

	for i in range(0, len(y), segment_size):
	segment = y[i:i+segment_size]
	if len(segment) > 100: # Only process if segment is long enough
	# Small speed variation
	speed_var = 1.0 + np.random.normal(0, 0.004 * intensity)
	new_length = int(len(segment) / speed_var)

	if new_length > 0 and len(segment) > 0:
	# Simple resampling for timing variation
	original_indices = np.arange(len(segment))
	new_indices = np.linspace(0, len(segment)-1, new_length)
	segment_varied = np.interp(new_indices, original_indices, segment)

	# Resample back to original length if needed
	if len(segment_varied) != len(segment):
	if len(segment_varied) > len(segment):
	segment_varied = segment_varied[:len(segment)]
	else:
	segment_varied = np.pad(segment_varied, (0, len(segment) - len(segment_varied)))

	segments.append(segment_varied)
	else:
	segments.append(segment)
	else:
	segments.append(segment)

	if segments:
	return np.concatenate(segments)
	return y

	def add_pitch_variations(self, y, sr, intensity):
	"""Add subtle pitch variations"""
	if intensity < 0.2:
	return y

	try:
	# Use librosa for pitch shifting (more reliable)
	n_steps = np.random.normal(0, 0.1 * intensity)
	y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps, bins_per_octave=24)

	# Blend with original
	blend_factor = 0.15 * intensity
	return y * (1 - blend_factor) + y_shifted * blend_factor
	except:
	return y

	def add_room_ambiance(self, y, sr, intensity):
	"""Add natural room reverb"""
	if intensity < 0.1:
	return y

	# Simple impulse response for natural room
	impulse_length = int(0.2 * sr) # 200ms reverb
	if impulse_length < 10:
	return y

	impulse = np.zeros(impulse_length)

	# Early reflections
	early_reflections = int(0.01 * sr) # 10ms
	if early_reflections < len(impulse):
	impulse[early_reflections] = 0.6

	# Late reverb tail
	reverb_start = min(early_reflections + 1, len(impulse))
	if reverb_start < len(impulse):
	tail_length = len(impulse) - reverb_start
	decay = np.exp(-np.linspace(0, 8, tail_length))
	impulse[reverb_start:] = decay * 0.3

	# Normalize impulse
	if np.max(np.abs(impulse)) > 0:
	impulse = impulse / np.max(np.abs(impulse))

	# Apply convolution
	try:
	y_reverb = signal.convolve(y, impulse, mode='same')
	# Normalize to prevent clipping
	if np.max(np.abs(y_reverb)) > 0:
	y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))

	# Blend with original
	blend_factor = 0.08 * intensity
	return y * (1 - blend_factor) + y_reverb * blend_factor
	except:
	return y

	def add_analog_warmth(self, y, sr, intensity):
	"""Add analog-style warmth"""
	# Soft clipping saturation
	saturation_amount = 1.0 + 0.3 * intensity
	y_saturated = np.tanh(y * saturation_amount) / saturation_amount

	# Add subtle warmth with EQ
	try:
	# Gentle low-end boost
	sos = signal.butter(2, 80, 'highpass', fs=sr, output='sos')
	y_warm = signal.sosfilt(sos, y_saturated)

	# Blend
	blend_factor = 0.1 * intensity
	return y * (1 - blend_factor) + y_warm * blend_factor
	except:
	return y_saturated

	def reduce_perfect_quantization(self, y, sr, intensity):
	"""Reduce perfectly quantized timing with amplitude variations"""
	# Add subtle random amplitude variations
	t = np.linspace(0, len(y)/sr, len(y))

	# Low-frequency amplitude modulation
	lfo_rate = 0.3 + 0.4 * intensity # Hz
	lfo_depth = 0.03 * intensity
	amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth

	# Random micro-variations
	random_variation = 1.0 + np.random.normal(0, 0.01 * intensity, len(y))

	# Combine variations
	total_variation = amplitude_variation * random_variation

	return y * total_variation

	def humanize_song(input_mp3, intensity):
	"""Main humanization function"""
	if input_mp3 is None:
	return None, "Please upload an audio file"

	humanizer = AIHumanizer()

	try:
	print("Starting humanization process...")

	# Process the entire song to remove AI artifacts
	audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)

	print(f"Humanization complete. Saving audio: shape={audio_data.shape}, sr={sr}")

	# Save as WAV (more reliable than MP3)
	output_path = tempfile.mktemp(suffix='_humanized.wav')

	# Ensure data is in correct format
	if len(audio_data.shape) > 1:
	audio_data = audio_data.T # Transpose for soundfile

	sf.write(output_path, audio_data, sr)
	print(f"Audio saved successfully to: {output_path}")

	return output_path, "✅ Song humanized! AI artifacts removed and human feel added."

	except Exception as e:
	error_msg = f"❌ Error: {str(e)}"
	print(error_msg)
	return None, error_msg

	# Simple and reliable interface
	with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
	gr.Markdown("""
	# 🎵 AI Song Humanizer
	Remove AI Detection - Make Your Songs Sound Human-Made

	Upload your AI-generated song → Remove robotic artifacts → Download natural-sounding version
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 1. Upload AI Song")
	input_audio = gr.Audio(
	sources=["upload", "microphone"],
	type="filepath",
	label="Upload your complete AI-generated song",
	editable=True
	)

	gr.Markdown("### 2. Humanization Strength")
	intensity = gr.Slider(
	0.1, 1.0, value=0.7,
	label="How much human feel to add",
	info="Lower = subtle, Higher = more natural/organic"
	)

	process_btn = gr.Button(
	"🎹 Humanize This Song",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	gr.Markdown("### 3. Download Result")
	output_audio = gr.Audio(
	label="Your Human-Sounding Song",
	type="filepath",
	interactive=False
	)

	status = gr.Textbox(
	label="Status",
	interactive=False,
	max_lines=3
	)

	with gr.Accordion("💡 How It Works", open=True):
	gr.Markdown("""
	This tool processes your EXISTING song to remove AI characteristics:

	✅ Keeps Everything Original:
	- Your complete song structure
	- All vocals and instruments
	- Melody and arrangement
	- Everything you created

	🎛️ Removes AI Artifacts:
	- Robotic/metallic frequencies
	- Perfect digital quantization
	- Sterile, artificial sound
	- AI-generated frequency patterns

	🎵 Adds Human Elements:
	- Natural timing variations
	- Subtle pitch fluctuations
	- Room ambiance and warmth
	- Analog-style character

	Result: Your same song, but it sounds like humans performed it!
	""")

	# Processing function
	process_btn.click(
	fn=humanize_song,
	inputs=[input_audio, intensity],
	outputs=[output_audio, status]
	)

	if __name__ == "__main__":
	demo.launch(debug=True)