HumanizeBot / app.py
FILMITO's picture
Update app.py
20ef8ef verified
raw
history blame
12 kB
import gradio as gr
import numpy as np
import tempfile
import librosa
import soundfile as sf
from scipy import signal
import os
class AIHumanizer:
def __init__(self):
pass
def humanize_audio(self, audio_path, intensity=0.7):
"""Remove AI artifacts and make audio sound human-made"""
try:
print(f"Loading audio from: {audio_path}")
# Load the full song - handle both mono and stereo
y, sr = librosa.load(audio_path, sr=None, mono=False)
print(f"Audio loaded: shape={y.shape}, sr={sr}, duration={len(y)/sr:.2f}s")
# If stereo, process both channels
if len(y.shape) > 1:
print("Processing stereo audio...")
processed_channels = []
for i, channel in enumerate(y):
print(f"Processing channel {i+1}...")
processed_channel = self.process_channel(channel, sr, intensity)
processed_channels.append(processed_channel)
y_processed = np.array(processed_channels)
else:
print("Processing mono audio...")
y_processed = self.process_channel(y, sr, intensity)
y_processed = np.array([y_processed]) # Make it 2D for consistency
print("Audio processing completed successfully")
return y_processed, sr
except Exception as e:
print(f"Error in humanize_audio: {str(e)}")
raise Exception(f"Humanization failed: {str(e)}")
def process_channel(self, y, sr, intensity):
"""Process a single audio channel to remove AI artifacts"""
print(f"Processing channel: {len(y)} samples, intensity={intensity}")
# 1. Reduce robotic frequencies
y_processed = self.reduce_ai_artifacts(y, sr, intensity)
# 2. Add timing variations
y_processed = self.add_timing_variations(y_processed, sr, intensity)
# 3. Add pitch variations
y_processed = self.add_pitch_variations(y_processed, sr, intensity)
# 4. Add room ambiance
y_processed = self.add_room_ambiance(y_processed, sr, intensity)
# 5. Add analog warmth
y_processed = self.add_analog_warmth(y_processed, sr, intensity)
# 6. Reduce perfect quantization
y_processed = self.reduce_perfect_quantization(y_processed, sr, intensity)
return y_processed
def reduce_ai_artifacts(self, y, sr, intensity):
"""Reduce common AI audio artifacts"""
# Reduce harsh frequencies in the 2kHz-6kHz range (common AI artifacts)
if sr > 4000: # Only if sample rate is high enough
sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
y_filtered = signal.sosfilt(sos, y)
# Blend with original based on intensity
y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3)
return y_processed
return y
def add_timing_variations(self, y, sr, intensity):
"""Add subtle timing variations"""
if intensity < 0.1:
return y
# Create small random speed variations
segment_size = int(sr * 2.0) # 2-second segments
segments = []
for i in range(0, len(y), segment_size):
segment = y[i:i+segment_size]
if len(segment) > 100: # Only process if segment is long enough
# Small speed variation
speed_var = 1.0 + np.random.normal(0, 0.004 * intensity)
new_length = int(len(segment) / speed_var)
if new_length > 0 and len(segment) > 0:
# Simple resampling for timing variation
original_indices = np.arange(len(segment))
new_indices = np.linspace(0, len(segment)-1, new_length)
segment_varied = np.interp(new_indices, original_indices, segment)
# Resample back to original length if needed
if len(segment_varied) != len(segment):
if len(segment_varied) > len(segment):
segment_varied = segment_varied[:len(segment)]
else:
segment_varied = np.pad(segment_varied, (0, len(segment) - len(segment_varied)))
segments.append(segment_varied)
else:
segments.append(segment)
else:
segments.append(segment)
if segments:
return np.concatenate(segments)
return y
def add_pitch_variations(self, y, sr, intensity):
"""Add subtle pitch variations"""
if intensity < 0.2:
return y
try:
# Use librosa for pitch shifting (more reliable)
n_steps = np.random.normal(0, 0.1 * intensity)
y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps, bins_per_octave=24)
# Blend with original
blend_factor = 0.15 * intensity
return y * (1 - blend_factor) + y_shifted * blend_factor
except:
return y
def add_room_ambiance(self, y, sr, intensity):
"""Add natural room reverb"""
if intensity < 0.1:
return y
# Simple impulse response for natural room
impulse_length = int(0.2 * sr) # 200ms reverb
if impulse_length < 10:
return y
impulse = np.zeros(impulse_length)
# Early reflections
early_reflections = int(0.01 * sr) # 10ms
if early_reflections < len(impulse):
impulse[early_reflections] = 0.6
# Late reverb tail
reverb_start = min(early_reflections + 1, len(impulse))
if reverb_start < len(impulse):
tail_length = len(impulse) - reverb_start
decay = np.exp(-np.linspace(0, 8, tail_length))
impulse[reverb_start:] = decay * 0.3
# Normalize impulse
if np.max(np.abs(impulse)) > 0:
impulse = impulse / np.max(np.abs(impulse))
# Apply convolution
try:
y_reverb = signal.convolve(y, impulse, mode='same')
# Normalize to prevent clipping
if np.max(np.abs(y_reverb)) > 0:
y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
# Blend with original
blend_factor = 0.08 * intensity
return y * (1 - blend_factor) + y_reverb * blend_factor
except:
return y
def add_analog_warmth(self, y, sr, intensity):
"""Add analog-style warmth"""
# Soft clipping saturation
saturation_amount = 1.0 + 0.3 * intensity
y_saturated = np.tanh(y * saturation_amount) / saturation_amount
# Add subtle warmth with EQ
try:
# Gentle low-end boost
sos = signal.butter(2, 80, 'highpass', fs=sr, output='sos')
y_warm = signal.sosfilt(sos, y_saturated)
# Blend
blend_factor = 0.1 * intensity
return y * (1 - blend_factor) + y_warm * blend_factor
except:
return y_saturated
def reduce_perfect_quantization(self, y, sr, intensity):
"""Reduce perfectly quantized timing with amplitude variations"""
# Add subtle random amplitude variations
t = np.linspace(0, len(y)/sr, len(y))
# Low-frequency amplitude modulation
lfo_rate = 0.3 + 0.4 * intensity # Hz
lfo_depth = 0.03 * intensity
amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth
# Random micro-variations
random_variation = 1.0 + np.random.normal(0, 0.01 * intensity, len(y))
# Combine variations
total_variation = amplitude_variation * random_variation
return y * total_variation
def humanize_song(input_mp3, intensity):
"""Main humanization function"""
if input_mp3 is None:
return None, "Please upload an audio file"
humanizer = AIHumanizer()
try:
print("Starting humanization process...")
# Process the entire song to remove AI artifacts
audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)
print(f"Humanization complete. Saving audio: shape={audio_data.shape}, sr={sr}")
# Save as WAV (more reliable than MP3)
output_path = tempfile.mktemp(suffix='_humanized.wav')
# Ensure data is in correct format
if len(audio_data.shape) > 1:
audio_data = audio_data.T # Transpose for soundfile
sf.write(output_path, audio_data, sr)
print(f"Audio saved successfully to: {output_path}")
return output_path, "βœ… Song humanized! AI artifacts removed and human feel added."
except Exception as e:
error_msg = f"❌ Error: {str(e)}"
print(error_msg)
return None, error_msg
# Simple and reliable interface
with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
gr.Markdown("""
# 🎡 AI Song Humanizer
**Remove AI Detection - Make Your Songs Sound Human-Made**
*Upload your AI-generated song β†’ Remove robotic artifacts β†’ Download natural-sounding version*
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 1. Upload AI Song")
input_audio = gr.Audio(
sources=["upload", "microphone"],
type="filepath",
label="Upload your complete AI-generated song",
editable=True
)
gr.Markdown("### 2. Humanization Strength")
intensity = gr.Slider(
0.1, 1.0, value=0.7,
label="How much human feel to add",
info="Lower = subtle, Higher = more natural/organic"
)
process_btn = gr.Button(
"🎹 Humanize This Song",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
gr.Markdown("### 3. Download Result")
output_audio = gr.Audio(
label="Your Human-Sounding Song",
type="filepath",
interactive=False
)
status = gr.Textbox(
label="Status",
interactive=False,
max_lines=3
)
with gr.Accordion("πŸ’‘ How It Works", open=True):
gr.Markdown("""
**This tool processes your EXISTING song to remove AI characteristics:**
βœ… **Keeps Everything Original:**
- Your complete song structure
- All vocals and instruments
- Melody and arrangement
- Everything you created
πŸŽ›οΈ **Removes AI Artifacts:**
- Robotic/metallic frequencies
- Perfect digital quantization
- Sterile, artificial sound
- AI-generated frequency patterns
🎡 **Adds Human Elements:**
- Natural timing variations
- Subtle pitch fluctuations
- Room ambiance and warmth
- Analog-style character
**Result:** Your same song, but it sounds like humans performed it!
""")
# Processing function
process_btn.click(
fn=humanize_song,
inputs=[input_audio, intensity],
outputs=[output_audio, status]
)
if __name__ == "__main__":
demo.launch(debug=True)