import gradio as gr
import librosa
import numpy as np
import torch
from diffusers import StableDiffusionPipeline
import os
import gradio as gr
import sys

print(f"Gradio version: {gr.__version__}")
print(f"Gradio location: {gr.__file__}")
print(f"Python executable: {sys.executable}")

# Ensure that the script uses CUDA if available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load the Stable Diffusion model
model_id = "runwayml/stable-diffusion-v1-5"  # Updated model ID for better accessibility
try:
    stable_diffusion = StableDiffusionPipeline.from_pretrained(
        model_id,
        torch_dtype=torch.float16 if device == "cuda" else torch.float32
    ).to(device)
except Exception as e:
    print(f"Error loading the model: {e}")
    print("Ensure you have the correct model ID and access rights.")
    exit(1)

def describe_audio(audio_path):
    """
    Generate a textual description based on audio features.

    Parameters:
        audio_path (str): Path to the audio file.

    Returns:
        str: Generated description.
    """
    try:
        # Load the audio file
        y, sr = librosa.load(audio_path, sr=None)

        # Extract Mel Spectrogram
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        db_spec = librosa.power_to_db(S, ref=np.max)

        # Calculate average amplitude and frequency
        avg_amplitude = np.mean(db_spec)
        spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)
        avg_frequency = np.mean(spectral_centroids)

        # Generate description based on amplitude
        if avg_amplitude < -40:
            amplitude_desc = "a calm and serene landscape with gentle waves"
        elif avg_amplitude < -20:
            amplitude_desc = "a vibrant forest with rustling leaves"
        else:
            amplitude_desc = "a thunderstorm with dark clouds and lightning"

        # Generate description based on frequency
        if avg_frequency < 2000:
            frequency_desc = "under soft, ambient light"
        elif avg_frequency < 4000:
            frequency_desc = "with vivid and lively colors"
        else:
            frequency_desc = "in a surreal and dynamic setting"

        # Combine descriptions
        description = f"{amplitude_desc} {frequency_desc}"
        return description
    except Exception as e:
        print(f"Error processing audio: {e}")
        return "an abstract artistic scene"

def generate_image(description):
    """
    Generate an image using the Stable Diffusion model based on the description.

    Parameters:
        description (str): Textual description for image generation.

    Returns:
        PIL.Image: Generated image.
    """
    try:
        if device == "cuda":
            with torch.autocast("cuda"):
                image = stable_diffusion(description).images[0]
        else:
            image = stable_diffusion(description).images[0]
        return image
    except Exception as e:
        print(f"Error generating image: {e}")
        return None

def audio_to_image(audio_file):
    """
    Convert an audio file to an artistic image.

    Parameters:
        audio_file (str): Path to the uploaded audio file.

    Returns:
        PIL.Image or str: Generated image or error message.
    """
    if audio_file is None:
        return "No audio file provided."

    description = describe_audio(audio_file)
    print(f"Generated Description: {description}")

    image = generate_image(description)
    if image is not None:
        return image
    else:
        return "Failed to generate image."

# Gradio Interface
title = "🎵 Audio to Artistic Image Converter 🎨"
description_text = """
Upload an audio file, and this app will generate an artistic image based on the sound's characteristics.
"""

# Define example paths
example_paths = [
    "example_audio/calm_ocean.wav",
    "example_audio/rustling_leaves.wav",
    "example_audio/thunderstorm.wav",
]

# Verify example files exist
valid_examples = []
for path in example_paths:
    if os.path.isfile(path):
        valid_examples.append([path])
    else:
        print(f"Example file not found: {path}")

if not os.path.exists("example_audio"):
    os.makedirs("example_audio")
    print("Please add some example audio files in the 'example_audio' directory.")

interface = gr.Interface(
    fn=audio_to_image,
    inputs=gr.Audio(source="upload", type="filepath"),
    outputs=gr.Image(type="pil"),
    title=title,
    description=description_text,
    examples=valid_examples if valid_examples else None,
    allow_flagging="never",
    theme="default"
)

if __name__ == "__main__":
    interface.launch()