import gradio as gr
import spaces  # Enables ZeroGPU on Hugging Face
from transformers import AutoModelForCausalLM
from anticipation.sample import generate
from anticipation.convert import events_to_midi, midi_to_events
from anticipation import ops
from anticipation.tokenize import extract_instruments
import torch
from pyharp import *

#Model Choices
SMALL_MODEL = "stanford-crfm/music-small-800k"
MEDIUM_MODEL = "stanford-crfm/music-medium-800k"
LARGE_MODEL = "stanford-crfm/music-large-800k"

# === Model Card ===
model_card = ModelCard(
    name="Anticipatory Music Transformer",
    description="Using Anticipatory Music Transformer (AMT) to generate accompaniment for a given MIDI file with selected melody.",
    author="John Thickstun, David Hall, Chris Donahue, Percy Liang",
    tags=["midi", "generation", "accompaniment"],
    midi_in=True,
    midi_out=True
)

model_cache = {}

def load_amt_model(model_choice):
    """Loads and caches the AMT model inside the worker process."""
    if model_choice in model_cache:
        return model_cache[model_choice]  
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = AutoModelForCausalLM.from_pretrained(model_choice).to(device)
    
    model_cache[model_choice] = model  
    return model


@spaces.GPU
def generate_accompaniment(midi_file, model_choice, selected_midi_program, history_length):
    """Generates accompaniment for the entire MIDI input, conditioned on the user-selected history length."""
    
    model = load_amt_model(model_choice)  
    events = midi_to_events(midi_file.name)
    total_time = round(ops.max_time(events, seconds=True))
    
    # Extract melody line using the selected MIDI program number
    events, melody = extract_instruments(events, [selected_midi_program])

    if not melody:  
        return None, "⚠️ Please select a valid MIDI program that contains events."

    history = ops.clip(events, 0, history_length, clip_duration=False)

    # Generate accompaniment for the remaining duration
    accompaniment = generate(
        model,
        history_length,  # Start generating after user-defined history length
        total_time,  # Generate for the full remaining duration
        inputs=history,
        controls=melody,
        top_p=0.95,
        debug=False
    )

    # Combine the accompaniment with the melody
    output_events = ops.clip(ops.combine(accompaniment, melody), 0, total_time, clip_duration=True)
    
    # Convert back to MIDI
    output_midi = "generated_accompaniment_huggingface.mid"
    mid = events_to_midi(output_events)
    mid.save(output_midi)

    return output_midi, None  


def process_fn(input_midi, model_choice, selected_midi_program, history_length):
    """Processes the input and runs AMT to generate accompaniment for the full MIDI file."""   
    output_midi, error_message = generate_accompaniment(input_midi, model_choice, selected_midi_program, history_length)

    if error_message:  
        return None, {"message": error_message}  

    output_labels = LabelList()
    return output_midi, output_labels


# === Build HARP gradioEndpoint ===
with gr.Blocks() as demo:
    components = [
        gr.Dropdown(
          choices=[SMALL_MODEL, MEDIUM_MODEL, LARGE_MODEL], 
            value=MEDIUM_MODEL, 
            label="Select AMT Model (Faster vs. Higher Quality)"
        ),
        gr.Slider(0, 127, step=1, value=1, label="Select Melody Instrument (MIDI Program Number)"),
        gr.Slider(1, 10, step=1, value=5, label="Select History Length (seconds)")
    ]

        # Wrap in PyHARP
    app = build_endpoint(
        model_card=model_card,
        components=components,
        process_fn=process_fn)

# Launch PyHARP App
demo.launch(share=True, show_error=True, debug=True)