import gradio as gr
import pretty_midi
import numpy as np
import tempfile
import os
import librosa
import torch
import torchaudio
from pathlib import Path
import soundfile as sf
import io

class MP3ToHumanized:
    def __init__(self):
        self.groove_profiles = {
            "drums": {"timing_var": 0.02, "velocity_var": 15, "swing_factor": 0.1},
            "melody": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05},
            "bass": {"timing_var": 0.015, "velocity_var": 12, "swing_factor": 0.07},
            "chords": {"timing_var": 0.008, "velocity_var": 8, "swing_factor": 0.03},
            "other": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05}
        }
        
    def audio_to_midi(self, audio_path, conversion_method="basic"):
        """Convert audio file to MIDI using different methods"""
        try:
            # Load audio file
            y, sr = librosa.load(audio_path, sr=22050)
            
            if conversion_method == "basic":
                return self.basic_audio_to_midi(y, sr)
            elif conversion_method == "melody":
                return self.melody_extraction_to_midi(y, sr)
            else:
                return self.rhythm_based_midi(y, sr)
                
        except Exception as e:
            raise Exception(f"Audio to MIDI conversion failed: {str(e)}")
    
    def basic_audio_to_midi(self, y, sr):
        """Basic onset detection and pitch estimation"""
        # Create a pretty_midi object
        midi = pretty_midi.PrettyMIDI()
        
        # Create instrument
        piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
        instrument = pretty_midi.Instrument(program=piano_program)
        
        # Detect onsets (when notes start)
        onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, backtrack=True)
        onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
        
        # Estimate pitch for each onset
        for onset_time in onset_times:
            # Extract a segment around the onset
            start_sample = int(onset_time * sr)
            end_sample = start_sample + int(0.5 * sr)  # 500ms segment
            
            if end_sample < len(y):
                segment = y[start_sample:end_sample]
                
                # Estimate fundamental frequency
                f0 = self.estimate_pitch(segment, sr)
                
                if f0 > 0:
                    # Convert frequency to MIDI note number
                    midi_note = int(69 + 12 * np.log2(f0 / 440.0))
                    
                    # Only add if it's a valid MIDI note
                    if 0 <= midi_note <= 127:
                        # Create note
                        note = pretty_midi.Note(
                            velocity=np.random.randint(60, 100),
                            pitch=midi_note,
                            start=onset_time,
                            end=onset_time + 0.5  # 500ms duration
                        )
                        instrument.notes.append(note)
        
        midi.instruments.append(instrument)
        return midi
    
    def melody_extraction_to_midi(self, y, sr):
        """Extract melody and convert to MIDI"""
        midi = pretty_midi.PrettyMIDI()
        instrument = pretty_midi.Instrument(program=0)  # Piano
        
        # Use librosa's melody extraction
        f0, voiced_flag, voiced_probs = librosa.pyin(
            y, 
            fmin=librosa.note_to_hz('C2'), 
            fmax=librosa.note_to_hz('C7'),
            sr=sr
        )
        
        times = librosa.times_like(f0, sr=sr, hop_length=512)
        
        current_note = None
        note_start = 0
        
        for time, freq, voiced in zip(times, f0, voiced_flag):
            if voiced and not np.isnan(freq):
                midi_note = int(69 + 12 * np.log2(freq / 440.0))
                if 0 <= midi_note <= 127:
                    if current_note != midi_note:
                        if current_note is not None:
                            # End previous note
                            note = pretty_midi.Note(
                                velocity=80,
                                pitch=current_note,
                                start=note_start,
                                end=time
                            )
                            instrument.notes.append(note)
                        
                        # Start new note
                        current_note = midi_note
                        note_start = time
            else:
                if current_note is not None:
                    # End current note
                    note = pretty_midi.Note(
                        velocity=80,
                        pitch=current_note,
                        start=note_start,
                        end=time
                    )
                    instrument.notes.append(note)
                    current_note = None
        
        midi.instruments.append(instrument)
        return midi
    
    def rhythm_based_midi(self, y, sr):
        """Create rhythm-based MIDI from percussive elements"""
        midi = pretty_midi.PrettyMIDI()
        
        # Drum instrument
        drum_instrument = pretty_midi.Instrument(program=0, is_drum=True)
        
        # Detect strong beats and onsets
        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
        beat_times = librosa.frames_to_time(beats, sr=sr)
        
        # Add drum hits on beats
        for beat_time in beat_times:
            # Kick drum on strong beats
            note = pretty_midi.Note(
                velocity=100,
                pitch=36,  # Kick drum
                start=beat_time,
                end=beat_time + 0.1
            )
            drum_instrument.notes.append(note)
        
        midi.instruments.append(drum_instrument)
        return midi
    
    def estimate_pitch(self, segment, sr):
        """Estimate fundamental frequency from audio segment"""
        try:
            # Use autocorrelation for pitch detection
            corr = np.correlate(segment, segment, mode='full')
            corr = corr[len(corr)//2:]
            
            # Find the first peak after zero lag (fundamental frequency)
            d = np.diff(corr)
            start = np.where(d > 0)[0]
            if len(start) > 0:
                start = start[0]
                peak = np.argmax(corr[start:]) + start
                freq = sr / peak if peak > 0 else 0
                return freq
        except:
            pass
        return 0
    
    def humanize_midi(self, midi_data, intensity=0.7, style="organic"):
        """Humanize the MIDI data"""
        tempo = midi_data.estimate_tempo() if len(midi_data.instruments) > 0 else 120
        
        for instrument in midi_data.instruments:
            inst_type = "drums" if instrument.is_drum else "melody"
            profile = self.groove_profiles[inst_type]
            
            for note in instrument.notes:
                # Humanize timing
                timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
                note.start = max(0, note.start + timing_shift)
                
                # Humanize duration (except drums)
                if not instrument.is_drum:
                    duration_shift = np.random.normal(0, profile["timing_var"] * 0.3 * intensity)
                    note.end = max(note.start + 0.1, note.end + duration_shift)
                
                # Humanize velocity
                vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
                new_velocity = note.velocity + int(vel_shift * intensity)
                note.velocity = max(20, min(127, new_velocity))
        
        return midi_data

def process_audio_files(files, intensity, style, conversion_method):
    if not files:
        return None, None, "Please upload audio files (MP3, WAV, etc.)"
    
    converter = MP3ToHumanized()
    processed_files = []
    
    for file in files:
        try:
            # Convert audio to MIDI
            midi_data = converter.audio_to_midi(file.name, conversion_method)
            
            # Humanize the MIDI
            humanized_midi = converter.humanize_midi(midi_data, intensity, style)
            
            # Save humanized MIDI
            output_path = tempfile.mktemp(suffix='_humanized.mid')
            humanized_midi.write(output_path)
            processed_files.append(output_path)
            
        except Exception as e:
            return None, None, f"Error processing {file.name}: {str(e)}"
    
    if processed_files:
        # Create audio preview from first file
        preview_audio = None
        try:
            # Convert MIDI back to audio for preview
            midi_data = pretty_midi.PrettyMIDI(processed_files[0])
            audio_data = midi_data.synthesize()
            preview_path = tempfile.mktemp(suffix='_preview.wav')
            sf.write(preview_path, audio_data, 44100)
            preview_audio = preview_path
        except:
            preview_audio = None
            
        return processed_files, preview_audio, f"✅ Successfully processed {len(processed_files)} files!"
    else:
        return None, None, "❌ No files were processed successfully."

# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo:
    gr.Markdown("""
    # 🎵 MP3 HumanizeBot
    **Convert MP3/Audio to MIDI and remove AI traces to sound human-made!**
    
    Upload audio files from AI music generators, convert to MIDI, and apply natural humanization.
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📁 Upload Audio Files")
            
            file_input = gr.File(
                file_count="multiple",
                file_types=[".mp3", ".wav", ".ogg", ".m4a", ".flac"],
                label="Upload Audio Files",
                type="filepath"
            )
            
            conversion_method = gr.Radio(
                ["basic", "melody", "rhythm"],
                value="basic",
                label="🎵 Conversion Method",
                info="Basic = general purpose, Melody = focus on tunes, Rhythm = focus on beats"
            )
            
            intensity = gr.Slider(
                0.1, 1.0, 
                value=0.7, 
                label="🎚️ Humanization Intensity"
            )
            
            style = gr.Radio(
                ["organic", "groovy", "gentle"], 
                value="organic", 
                label="🎸 Humanization Style"
            )
            
            process_btn = gr.Button(
                "✨ Convert & Humanize!",
                variant="primary",
                size="lg"
            )
        
        with gr.Column(scale=1):
            gr.Markdown("### 📥 Download Results")
            
            file_output = gr.File(
                file_count="multiple",
                label="Download Humanized MIDI Files"
            )
            
            audio_output = gr.Audio(
                label="MIDI Audio Preview",
                interactive=False
            )
            
            status = gr.Textbox(
                label="Status",
                interactive=False,
                max_lines=4
            )
    
    with gr.Accordion("ℹ️ How It Works", open=False):
        gr.Markdown("""
        **Process:**
        1. **Upload** your AI-generated audio files (MP3, WAV, etc.)
        2. **Convert** to MIDI using pitch and rhythm detection
        3. **Humanize** with timing and velocity variations
        4. **Download** humanized MIDI files
        
        **Conversion Methods:**
        - **Basic**: General purpose conversion for most music
        - **Melody**: Focuses on extracting melodic content
        - **Rhythm**: Focuses on drum patterns and beats
        
        **Note**: Audio-to-MIDI conversion is challenging and works best with:
        - Clear melodic lines
        - Good audio quality
        - Not too much reverb/effects
        """)
    
    process_btn.click(
        fn=process_audio_files,
        inputs=[file_input, intensity, style, conversion_method],
        outputs=[file_output, audio_output, status]
    )

if __name__ == "__main__":
    demo.launch(debug=True)