import gradio as gr
import numpy as np
import tempfile
import librosa
import soundfile as sf
from scipy import signal
import os

class AIHumanizer:
    def __init__(self):
        pass
    
    def humanize_audio(self, audio_path, intensity=0.7):
        """Remove AI artifacts and make audio sound human-made"""
        try:
            print(f"Loading audio from: {audio_path}")
            
            # Load the full song - handle both mono and stereo
            y, sr = librosa.load(audio_path, sr=None, mono=False)
            
            print(f"Audio loaded: shape={y.shape}, sr={sr}, duration={len(y)/sr:.2f}s")
            
            # If stereo, process both channels
            if len(y.shape) > 1:
                print("Processing stereo audio...")
                processed_channels = []
                for i, channel in enumerate(y):
                    print(f"Processing channel {i+1}...")
                    processed_channel = self.process_channel(channel, sr, intensity)
                    processed_channels.append(processed_channel)
                y_processed = np.array(processed_channels)
            else:
                print("Processing mono audio...")
                y_processed = self.process_channel(y, sr, intensity)
                y_processed = np.array([y_processed])  # Make it 2D for consistency
            
            print("Audio processing completed successfully")
            return y_processed, sr
            
        except Exception as e:
            print(f"Error in humanize_audio: {str(e)}")
            raise Exception(f"Humanization failed: {str(e)}")
    
    def process_channel(self, y, sr, intensity):
        """Process a single audio channel to remove AI artifacts"""
        print(f"Processing channel: {len(y)} samples, intensity={intensity}")
        
        # 1. Reduce robotic frequencies
        y_processed = self.reduce_ai_artifacts(y, sr, intensity)
        
        # 2. Add timing variations
        y_processed = self.add_timing_variations(y_processed, sr, intensity)
        
        # 3. Add pitch variations
        y_processed = self.add_pitch_variations(y_processed, sr, intensity)
        
        # 4. Add room ambiance
        y_processed = self.add_room_ambiance(y_processed, sr, intensity)
        
        # 5. Add analog warmth
        y_processed = self.add_analog_warmth(y_processed, sr, intensity)
        
        # 6. Reduce perfect quantization
        y_processed = self.reduce_perfect_quantization(y_processed, sr, intensity)
        
        return y_processed
    
    def reduce_ai_artifacts(self, y, sr, intensity):
        """Reduce common AI audio artifacts"""
        # Reduce harsh frequencies in the 2kHz-6kHz range (common AI artifacts)
        if sr > 4000:  # Only if sample rate is high enough
            sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
            y_filtered = signal.sosfilt(sos, y)
            
            # Blend with original based on intensity
            y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3)
            return y_processed
        return y
    
    def add_timing_variations(self, y, sr, intensity):
        """Add subtle timing variations"""
        if intensity < 0.1:
            return y
            
        # Create small random speed variations
        segment_size = int(sr * 2.0)  # 2-second segments
        segments = []
        
        for i in range(0, len(y), segment_size):
            segment = y[i:i+segment_size]
            if len(segment) > 100:  # Only process if segment is long enough
                # Small speed variation
                speed_var = 1.0 + np.random.normal(0, 0.004 * intensity)
                new_length = int(len(segment) / speed_var)
                
                if new_length > 0 and len(segment) > 0:
                    # Simple resampling for timing variation
                    original_indices = np.arange(len(segment))
                    new_indices = np.linspace(0, len(segment)-1, new_length)
                    segment_varied = np.interp(new_indices, original_indices, segment)
                    
                    # Resample back to original length if needed
                    if len(segment_varied) != len(segment):
                        if len(segment_varied) > len(segment):
                            segment_varied = segment_varied[:len(segment)]
                        else:
                            segment_varied = np.pad(segment_varied, (0, len(segment) - len(segment_varied)))
                    
                    segments.append(segment_varied)
                else:
                    segments.append(segment)
            else:
                segments.append(segment)
        
        if segments:
            return np.concatenate(segments)
        return y
    
    def add_pitch_variations(self, y, sr, intensity):
        """Add subtle pitch variations"""
        if intensity < 0.2:
            return y
            
        try:
            # Use librosa for pitch shifting (more reliable)
            n_steps = np.random.normal(0, 0.1 * intensity)
            y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps, bins_per_octave=24)
            
            # Blend with original
            blend_factor = 0.15 * intensity
            return y * (1 - blend_factor) + y_shifted * blend_factor
        except:
            return y
    
    def add_room_ambiance(self, y, sr, intensity):
        """Add natural room reverb"""
        if intensity < 0.1:
            return y
            
        # Simple impulse response for natural room
        impulse_length = int(0.2 * sr)  # 200ms reverb
        if impulse_length < 10:
            return y
            
        impulse = np.zeros(impulse_length)
        
        # Early reflections
        early_reflections = int(0.01 * sr)  # 10ms
        if early_reflections < len(impulse):
            impulse[early_reflections] = 0.6
        
        # Late reverb tail
        reverb_start = min(early_reflections + 1, len(impulse))
        if reverb_start < len(impulse):
            tail_length = len(impulse) - reverb_start
            decay = np.exp(-np.linspace(0, 8, tail_length))
            impulse[reverb_start:] = decay * 0.3
        
        # Normalize impulse
        if np.max(np.abs(impulse)) > 0:
            impulse = impulse / np.max(np.abs(impulse))
        
        # Apply convolution
        try:
            y_reverb = signal.convolve(y, impulse, mode='same')
            # Normalize to prevent clipping
            if np.max(np.abs(y_reverb)) > 0:
                y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
            
            # Blend with original
            blend_factor = 0.08 * intensity
            return y * (1 - blend_factor) + y_reverb * blend_factor
        except:
            return y
    
    def add_analog_warmth(self, y, sr, intensity):
        """Add analog-style warmth"""
        # Soft clipping saturation
        saturation_amount = 1.0 + 0.3 * intensity
        y_saturated = np.tanh(y * saturation_amount) / saturation_amount
        
        # Add subtle warmth with EQ
        try:
            # Gentle low-end boost
            sos = signal.butter(2, 80, 'highpass', fs=sr, output='sos')
            y_warm = signal.sosfilt(sos, y_saturated)
            
            # Blend
            blend_factor = 0.1 * intensity
            return y * (1 - blend_factor) + y_warm * blend_factor
        except:
            return y_saturated
    
    def reduce_perfect_quantization(self, y, sr, intensity):
        """Reduce perfectly quantized timing with amplitude variations"""
        # Add subtle random amplitude variations
        t = np.linspace(0, len(y)/sr, len(y))
        
        # Low-frequency amplitude modulation
        lfo_rate = 0.3 + 0.4 * intensity  # Hz
        lfo_depth = 0.03 * intensity
        amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth
        
        # Random micro-variations
        random_variation = 1.0 + np.random.normal(0, 0.01 * intensity, len(y))
        
        # Combine variations
        total_variation = amplitude_variation * random_variation
        
        return y * total_variation

def humanize_song(input_mp3, intensity):
    """Main humanization function"""
    if input_mp3 is None:
        return None, "Please upload an audio file"
    
    humanizer = AIHumanizer()
    
    try:
        print("Starting humanization process...")
        
        # Process the entire song to remove AI artifacts
        audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)
        
        print(f"Humanization complete. Saving audio: shape={audio_data.shape}, sr={sr}")
        
        # Save as WAV (more reliable than MP3)
        output_path = tempfile.mktemp(suffix='_humanized.wav')
        
        # Ensure data is in correct format
        if len(audio_data.shape) > 1:
            audio_data = audio_data.T  # Transpose for soundfile
        
        sf.write(output_path, audio_data, sr)
        print(f"Audio saved successfully to: {output_path}")
        
        return output_path, "✅ Song humanized! AI artifacts removed and human feel added."
        
    except Exception as e:
        error_msg = f"❌ Error: {str(e)}"
        print(error_msg)
        return None, error_msg

# Simple and reliable interface
with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
    gr.Markdown("""
    # 🎵 AI Song Humanizer
    **Remove AI Detection - Make Your Songs Sound Human-Made**
    
    *Upload your AI-generated song → Remove robotic artifacts → Download natural-sounding version*
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 1. Upload AI Song")
            input_audio = gr.Audio(
                sources=["upload", "microphone"],
                type="filepath",
                label="Upload your complete AI-generated song",
                editable=True
            )
            
            gr.Markdown("### 2. Humanization Strength")
            intensity = gr.Slider(
                0.1, 1.0, value=0.7,
                label="How much human feel to add",
                info="Lower = subtle, Higher = more natural/organic"
            )
            
            process_btn = gr.Button(
                "🎹 Humanize This Song",
                variant="primary",
                size="lg"
            )
        
        with gr.Column(scale=1):
            gr.Markdown("### 3. Download Result")
            output_audio = gr.Audio(
                label="Your Human-Sounding Song",
                type="filepath",
                interactive=False
            )
            
            status = gr.Textbox(
                label="Status",
                interactive=False,
                max_lines=3
            )
    
    with gr.Accordion("💡 How It Works", open=True):
        gr.Markdown("""
        **This tool processes your EXISTING song to remove AI characteristics:**
        
        ✅ **Keeps Everything Original:**
        - Your complete song structure
        - All vocals and instruments
        - Melody and arrangement
        - Everything you created
        
        🎛️ **Removes AI Artifacts:**
        - Robotic/metallic frequencies
        - Perfect digital quantization
        - Sterile, artificial sound
        - AI-generated frequency patterns
        
        🎵 **Adds Human Elements:**
        - Natural timing variations
        - Subtle pitch fluctuations
        - Room ambiance and warmth
        - Analog-style character
        
        **Result:** Your same song, but it sounds like humans performed it!
        """)
    
    # Processing function
    process_btn.click(
        fn=humanize_song,
        inputs=[input_audio, intensity],
        outputs=[output_audio, status]
    )

if __name__ == "__main__":
    demo.launch(debug=True)