Spaces:
Sleeping
Sleeping
File size: 12,011 Bytes
07bbd8c 6f55663 14849b4 20ef8ef 07bbd8c 14849b4 07bbd8c 14849b4 71c15a0 14849b4 07bbd8c 20ef8ef 71c15a0 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 71c15a0 20ef8ef 14849b4 71c15a0 20ef8ef 14849b4 6b3afc0 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 6b3afc0 14849b4 20ef8ef 6b3afc0 14849b4 20ef8ef 14849b4 20ef8ef 6b3afc0 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 6b3afc0 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 0e91831 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 74e496a 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 07bbd8c 14849b4 74e496a 20ef8ef 07bbd8c 14849b4 07bbd8c 74e496a 20ef8ef 14849b4 74e496a 20ef8ef 74e496a 20ef8ef 74e496a 14849b4 74e496a 20ef8ef 07bbd8c 20ef8ef 14849b4 07bbd8c 14849b4 20ef8ef 6b3afc0 20ef8ef 07bbd8c 74e496a 20ef8ef 74e496a 20ef8ef 74e496a 20ef8ef 07bbd8c 71c15a0 20ef8ef 3ce0be1 14849b4 20ef8ef 3ce0be1 71c15a0 14849b4 20ef8ef 74e496a 07bbd8c 74e496a 20ef8ef 74e496a 20ef8ef 74e496a 07bbd8c 2d81959 20ef8ef 2d81959 07bbd8c 20ef8ef 280cba5 20ef8ef 14849b4 20ef8ef 14849b4 20ef8ef 280cba5 20ef8ef 14849b4 280cba5 07bbd8c 20ef8ef | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 | import gradio as gr
import numpy as np
import tempfile
import librosa
import soundfile as sf
from scipy import signal
import os
class AIHumanizer:
def __init__(self):
pass
def humanize_audio(self, audio_path, intensity=0.7):
"""Remove AI artifacts and make audio sound human-made"""
try:
print(f"Loading audio from: {audio_path}")
# Load the full song - handle both mono and stereo
y, sr = librosa.load(audio_path, sr=None, mono=False)
print(f"Audio loaded: shape={y.shape}, sr={sr}, duration={len(y)/sr:.2f}s")
# If stereo, process both channels
if len(y.shape) > 1:
print("Processing stereo audio...")
processed_channels = []
for i, channel in enumerate(y):
print(f"Processing channel {i+1}...")
processed_channel = self.process_channel(channel, sr, intensity)
processed_channels.append(processed_channel)
y_processed = np.array(processed_channels)
else:
print("Processing mono audio...")
y_processed = self.process_channel(y, sr, intensity)
y_processed = np.array([y_processed]) # Make it 2D for consistency
print("Audio processing completed successfully")
return y_processed, sr
except Exception as e:
print(f"Error in humanize_audio: {str(e)}")
raise Exception(f"Humanization failed: {str(e)}")
def process_channel(self, y, sr, intensity):
"""Process a single audio channel to remove AI artifacts"""
print(f"Processing channel: {len(y)} samples, intensity={intensity}")
# 1. Reduce robotic frequencies
y_processed = self.reduce_ai_artifacts(y, sr, intensity)
# 2. Add timing variations
y_processed = self.add_timing_variations(y_processed, sr, intensity)
# 3. Add pitch variations
y_processed = self.add_pitch_variations(y_processed, sr, intensity)
# 4. Add room ambiance
y_processed = self.add_room_ambiance(y_processed, sr, intensity)
# 5. Add analog warmth
y_processed = self.add_analog_warmth(y_processed, sr, intensity)
# 6. Reduce perfect quantization
y_processed = self.reduce_perfect_quantization(y_processed, sr, intensity)
return y_processed
def reduce_ai_artifacts(self, y, sr, intensity):
"""Reduce common AI audio artifacts"""
# Reduce harsh frequencies in the 2kHz-6kHz range (common AI artifacts)
if sr > 4000: # Only if sample rate is high enough
sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
y_filtered = signal.sosfilt(sos, y)
# Blend with original based on intensity
y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3)
return y_processed
return y
def add_timing_variations(self, y, sr, intensity):
"""Add subtle timing variations"""
if intensity < 0.1:
return y
# Create small random speed variations
segment_size = int(sr * 2.0) # 2-second segments
segments = []
for i in range(0, len(y), segment_size):
segment = y[i:i+segment_size]
if len(segment) > 100: # Only process if segment is long enough
# Small speed variation
speed_var = 1.0 + np.random.normal(0, 0.004 * intensity)
new_length = int(len(segment) / speed_var)
if new_length > 0 and len(segment) > 0:
# Simple resampling for timing variation
original_indices = np.arange(len(segment))
new_indices = np.linspace(0, len(segment)-1, new_length)
segment_varied = np.interp(new_indices, original_indices, segment)
# Resample back to original length if needed
if len(segment_varied) != len(segment):
if len(segment_varied) > len(segment):
segment_varied = segment_varied[:len(segment)]
else:
segment_varied = np.pad(segment_varied, (0, len(segment) - len(segment_varied)))
segments.append(segment_varied)
else:
segments.append(segment)
else:
segments.append(segment)
if segments:
return np.concatenate(segments)
return y
def add_pitch_variations(self, y, sr, intensity):
"""Add subtle pitch variations"""
if intensity < 0.2:
return y
try:
# Use librosa for pitch shifting (more reliable)
n_steps = np.random.normal(0, 0.1 * intensity)
y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps, bins_per_octave=24)
# Blend with original
blend_factor = 0.15 * intensity
return y * (1 - blend_factor) + y_shifted * blend_factor
except:
return y
def add_room_ambiance(self, y, sr, intensity):
"""Add natural room reverb"""
if intensity < 0.1:
return y
# Simple impulse response for natural room
impulse_length = int(0.2 * sr) # 200ms reverb
if impulse_length < 10:
return y
impulse = np.zeros(impulse_length)
# Early reflections
early_reflections = int(0.01 * sr) # 10ms
if early_reflections < len(impulse):
impulse[early_reflections] = 0.6
# Late reverb tail
reverb_start = min(early_reflections + 1, len(impulse))
if reverb_start < len(impulse):
tail_length = len(impulse) - reverb_start
decay = np.exp(-np.linspace(0, 8, tail_length))
impulse[reverb_start:] = decay * 0.3
# Normalize impulse
if np.max(np.abs(impulse)) > 0:
impulse = impulse / np.max(np.abs(impulse))
# Apply convolution
try:
y_reverb = signal.convolve(y, impulse, mode='same')
# Normalize to prevent clipping
if np.max(np.abs(y_reverb)) > 0:
y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
# Blend with original
blend_factor = 0.08 * intensity
return y * (1 - blend_factor) + y_reverb * blend_factor
except:
return y
def add_analog_warmth(self, y, sr, intensity):
"""Add analog-style warmth"""
# Soft clipping saturation
saturation_amount = 1.0 + 0.3 * intensity
y_saturated = np.tanh(y * saturation_amount) / saturation_amount
# Add subtle warmth with EQ
try:
# Gentle low-end boost
sos = signal.butter(2, 80, 'highpass', fs=sr, output='sos')
y_warm = signal.sosfilt(sos, y_saturated)
# Blend
blend_factor = 0.1 * intensity
return y * (1 - blend_factor) + y_warm * blend_factor
except:
return y_saturated
def reduce_perfect_quantization(self, y, sr, intensity):
"""Reduce perfectly quantized timing with amplitude variations"""
# Add subtle random amplitude variations
t = np.linspace(0, len(y)/sr, len(y))
# Low-frequency amplitude modulation
lfo_rate = 0.3 + 0.4 * intensity # Hz
lfo_depth = 0.03 * intensity
amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth
# Random micro-variations
random_variation = 1.0 + np.random.normal(0, 0.01 * intensity, len(y))
# Combine variations
total_variation = amplitude_variation * random_variation
return y * total_variation
def humanize_song(input_mp3, intensity):
"""Main humanization function"""
if input_mp3 is None:
return None, "Please upload an audio file"
humanizer = AIHumanizer()
try:
print("Starting humanization process...")
# Process the entire song to remove AI artifacts
audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)
print(f"Humanization complete. Saving audio: shape={audio_data.shape}, sr={sr}")
# Save as WAV (more reliable than MP3)
output_path = tempfile.mktemp(suffix='_humanized.wav')
# Ensure data is in correct format
if len(audio_data.shape) > 1:
audio_data = audio_data.T # Transpose for soundfile
sf.write(output_path, audio_data, sr)
print(f"Audio saved successfully to: {output_path}")
return output_path, "β
Song humanized! AI artifacts removed and human feel added."
except Exception as e:
error_msg = f"β Error: {str(e)}"
print(error_msg)
return None, error_msg
# Simple and reliable interface
with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
gr.Markdown("""
# π΅ AI Song Humanizer
**Remove AI Detection - Make Your Songs Sound Human-Made**
*Upload your AI-generated song β Remove robotic artifacts β Download natural-sounding version*
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 1. Upload AI Song")
input_audio = gr.Audio(
sources=["upload", "microphone"],
type="filepath",
label="Upload your complete AI-generated song",
editable=True
)
gr.Markdown("### 2. Humanization Strength")
intensity = gr.Slider(
0.1, 1.0, value=0.7,
label="How much human feel to add",
info="Lower = subtle, Higher = more natural/organic"
)
process_btn = gr.Button(
"πΉ Humanize This Song",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
gr.Markdown("### 3. Download Result")
output_audio = gr.Audio(
label="Your Human-Sounding Song",
type="filepath",
interactive=False
)
status = gr.Textbox(
label="Status",
interactive=False,
max_lines=3
)
with gr.Accordion("π‘ How It Works", open=True):
gr.Markdown("""
**This tool processes your EXISTING song to remove AI characteristics:**
β
**Keeps Everything Original:**
- Your complete song structure
- All vocals and instruments
- Melody and arrangement
- Everything you created
ποΈ **Removes AI Artifacts:**
- Robotic/metallic frequencies
- Perfect digital quantization
- Sterile, artificial sound
- AI-generated frequency patterns
π΅ **Adds Human Elements:**
- Natural timing variations
- Subtle pitch fluctuations
- Room ambiance and warmth
- Analog-style character
**Result:** Your same song, but it sounds like humans performed it!
""")
# Processing function
process_btn.click(
fn=humanize_song,
inputs=[input_audio, intensity],
outputs=[output_audio, status]
)
if __name__ == "__main__":
demo.launch(debug=True) |