Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import tempfile | |
| import librosa | |
| import soundfile as sf | |
| from scipy import signal | |
| import os | |
| class AIHumanizer: | |
| def __init__(self): | |
| pass | |
| def humanize_audio(self, audio_path, intensity=0.7): | |
| """Remove AI artifacts and make audio sound human-made""" | |
| try: | |
| print(f"Loading audio from: {audio_path}") | |
| # Load the full song - handle both mono and stereo | |
| y, sr = librosa.load(audio_path, sr=None, mono=False) | |
| print(f"Audio loaded: shape={y.shape}, sr={sr}, duration={len(y)/sr:.2f}s") | |
| # If stereo, process both channels | |
| if len(y.shape) > 1: | |
| print("Processing stereo audio...") | |
| processed_channels = [] | |
| for i, channel in enumerate(y): | |
| print(f"Processing channel {i+1}...") | |
| processed_channel = self.process_channel(channel, sr, intensity) | |
| processed_channels.append(processed_channel) | |
| y_processed = np.array(processed_channels) | |
| else: | |
| print("Processing mono audio...") | |
| y_processed = self.process_channel(y, sr, intensity) | |
| y_processed = np.array([y_processed]) # Make it 2D for consistency | |
| print("Audio processing completed successfully") | |
| return y_processed, sr | |
| except Exception as e: | |
| print(f"Error in humanize_audio: {str(e)}") | |
| raise Exception(f"Humanization failed: {str(e)}") | |
| def process_channel(self, y, sr, intensity): | |
| """Process a single audio channel to remove AI artifacts""" | |
| print(f"Processing channel: {len(y)} samples, intensity={intensity}") | |
| # 1. Reduce robotic frequencies | |
| y_processed = self.reduce_ai_artifacts(y, sr, intensity) | |
| # 2. Add timing variations | |
| y_processed = self.add_timing_variations(y_processed, sr, intensity) | |
| # 3. Add pitch variations | |
| y_processed = self.add_pitch_variations(y_processed, sr, intensity) | |
| # 4. Add room ambiance | |
| y_processed = self.add_room_ambiance(y_processed, sr, intensity) | |
| # 5. Add analog warmth | |
| y_processed = self.add_analog_warmth(y_processed, sr, intensity) | |
| # 6. Reduce perfect quantization | |
| y_processed = self.reduce_perfect_quantization(y_processed, sr, intensity) | |
| return y_processed | |
| def reduce_ai_artifacts(self, y, sr, intensity): | |
| """Reduce common AI audio artifacts""" | |
| # Reduce harsh frequencies in the 2kHz-6kHz range (common AI artifacts) | |
| if sr > 4000: # Only if sample rate is high enough | |
| sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos') | |
| y_filtered = signal.sosfilt(sos, y) | |
| # Blend with original based on intensity | |
| y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3) | |
| return y_processed | |
| return y | |
| def add_timing_variations(self, y, sr, intensity): | |
| """Add subtle timing variations""" | |
| if intensity < 0.1: | |
| return y | |
| # Create small random speed variations | |
| segment_size = int(sr * 2.0) # 2-second segments | |
| segments = [] | |
| for i in range(0, len(y), segment_size): | |
| segment = y[i:i+segment_size] | |
| if len(segment) > 100: # Only process if segment is long enough | |
| # Small speed variation | |
| speed_var = 1.0 + np.random.normal(0, 0.004 * intensity) | |
| new_length = int(len(segment) / speed_var) | |
| if new_length > 0 and len(segment) > 0: | |
| # Simple resampling for timing variation | |
| original_indices = np.arange(len(segment)) | |
| new_indices = np.linspace(0, len(segment)-1, new_length) | |
| segment_varied = np.interp(new_indices, original_indices, segment) | |
| # Resample back to original length if needed | |
| if len(segment_varied) != len(segment): | |
| if len(segment_varied) > len(segment): | |
| segment_varied = segment_varied[:len(segment)] | |
| else: | |
| segment_varied = np.pad(segment_varied, (0, len(segment) - len(segment_varied))) | |
| segments.append(segment_varied) | |
| else: | |
| segments.append(segment) | |
| else: | |
| segments.append(segment) | |
| if segments: | |
| return np.concatenate(segments) | |
| return y | |
| def add_pitch_variations(self, y, sr, intensity): | |
| """Add subtle pitch variations""" | |
| if intensity < 0.2: | |
| return y | |
| try: | |
| # Use librosa for pitch shifting (more reliable) | |
| n_steps = np.random.normal(0, 0.1 * intensity) | |
| y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps, bins_per_octave=24) | |
| # Blend with original | |
| blend_factor = 0.15 * intensity | |
| return y * (1 - blend_factor) + y_shifted * blend_factor | |
| except: | |
| return y | |
| def add_room_ambiance(self, y, sr, intensity): | |
| """Add natural room reverb""" | |
| if intensity < 0.1: | |
| return y | |
| # Simple impulse response for natural room | |
| impulse_length = int(0.2 * sr) # 200ms reverb | |
| if impulse_length < 10: | |
| return y | |
| impulse = np.zeros(impulse_length) | |
| # Early reflections | |
| early_reflections = int(0.01 * sr) # 10ms | |
| if early_reflections < len(impulse): | |
| impulse[early_reflections] = 0.6 | |
| # Late reverb tail | |
| reverb_start = min(early_reflections + 1, len(impulse)) | |
| if reverb_start < len(impulse): | |
| tail_length = len(impulse) - reverb_start | |
| decay = np.exp(-np.linspace(0, 8, tail_length)) | |
| impulse[reverb_start:] = decay * 0.3 | |
| # Normalize impulse | |
| if np.max(np.abs(impulse)) > 0: | |
| impulse = impulse / np.max(np.abs(impulse)) | |
| # Apply convolution | |
| try: | |
| y_reverb = signal.convolve(y, impulse, mode='same') | |
| # Normalize to prevent clipping | |
| if np.max(np.abs(y_reverb)) > 0: | |
| y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y)) | |
| # Blend with original | |
| blend_factor = 0.08 * intensity | |
| return y * (1 - blend_factor) + y_reverb * blend_factor | |
| except: | |
| return y | |
| def add_analog_warmth(self, y, sr, intensity): | |
| """Add analog-style warmth""" | |
| # Soft clipping saturation | |
| saturation_amount = 1.0 + 0.3 * intensity | |
| y_saturated = np.tanh(y * saturation_amount) / saturation_amount | |
| # Add subtle warmth with EQ | |
| try: | |
| # Gentle low-end boost | |
| sos = signal.butter(2, 80, 'highpass', fs=sr, output='sos') | |
| y_warm = signal.sosfilt(sos, y_saturated) | |
| # Blend | |
| blend_factor = 0.1 * intensity | |
| return y * (1 - blend_factor) + y_warm * blend_factor | |
| except: | |
| return y_saturated | |
| def reduce_perfect_quantization(self, y, sr, intensity): | |
| """Reduce perfectly quantized timing with amplitude variations""" | |
| # Add subtle random amplitude variations | |
| t = np.linspace(0, len(y)/sr, len(y)) | |
| # Low-frequency amplitude modulation | |
| lfo_rate = 0.3 + 0.4 * intensity # Hz | |
| lfo_depth = 0.03 * intensity | |
| amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth | |
| # Random micro-variations | |
| random_variation = 1.0 + np.random.normal(0, 0.01 * intensity, len(y)) | |
| # Combine variations | |
| total_variation = amplitude_variation * random_variation | |
| return y * total_variation | |
| def humanize_song(input_mp3, intensity): | |
| """Main humanization function""" | |
| if input_mp3 is None: | |
| return None, "Please upload an audio file" | |
| humanizer = AIHumanizer() | |
| try: | |
| print("Starting humanization process...") | |
| # Process the entire song to remove AI artifacts | |
| audio_data, sr = humanizer.humanize_audio(input_mp3, intensity) | |
| print(f"Humanization complete. Saving audio: shape={audio_data.shape}, sr={sr}") | |
| # Save as WAV (more reliable than MP3) | |
| output_path = tempfile.mktemp(suffix='_humanized.wav') | |
| # Ensure data is in correct format | |
| if len(audio_data.shape) > 1: | |
| audio_data = audio_data.T # Transpose for soundfile | |
| sf.write(output_path, audio_data, sr) | |
| print(f"Audio saved successfully to: {output_path}") | |
| return output_path, "β Song humanized! AI artifacts removed and human feel added." | |
| except Exception as e: | |
| error_msg = f"β Error: {str(e)}" | |
| print(error_msg) | |
| return None, error_msg | |
| # Simple and reliable interface | |
| with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo: | |
| gr.Markdown(""" | |
| # π΅ AI Song Humanizer | |
| **Remove AI Detection - Make Your Songs Sound Human-Made** | |
| *Upload your AI-generated song β Remove robotic artifacts β Download natural-sounding version* | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 1. Upload AI Song") | |
| input_audio = gr.Audio( | |
| sources=["upload", "microphone"], | |
| type="filepath", | |
| label="Upload your complete AI-generated song", | |
| editable=True | |
| ) | |
| gr.Markdown("### 2. Humanization Strength") | |
| intensity = gr.Slider( | |
| 0.1, 1.0, value=0.7, | |
| label="How much human feel to add", | |
| info="Lower = subtle, Higher = more natural/organic" | |
| ) | |
| process_btn = gr.Button( | |
| "πΉ Humanize This Song", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 3. Download Result") | |
| output_audio = gr.Audio( | |
| label="Your Human-Sounding Song", | |
| type="filepath", | |
| interactive=False | |
| ) | |
| status = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| max_lines=3 | |
| ) | |
| with gr.Accordion("π‘ How It Works", open=True): | |
| gr.Markdown(""" | |
| **This tool processes your EXISTING song to remove AI characteristics:** | |
| β **Keeps Everything Original:** | |
| - Your complete song structure | |
| - All vocals and instruments | |
| - Melody and arrangement | |
| - Everything you created | |
| ποΈ **Removes AI Artifacts:** | |
| - Robotic/metallic frequencies | |
| - Perfect digital quantization | |
| - Sterile, artificial sound | |
| - AI-generated frequency patterns | |
| π΅ **Adds Human Elements:** | |
| - Natural timing variations | |
| - Subtle pitch fluctuations | |
| - Room ambiance and warmth | |
| - Analog-style character | |
| **Result:** Your same song, but it sounds like humans performed it! | |
| """) | |
| # Processing function | |
| process_btn.click( | |
| fn=humanize_song, | |
| inputs=[input_audio, intensity], | |
| outputs=[output_audio, status] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) |