#!/usr/bin/env python
"""
Inference script for ResNet50 trained on ImageNet-1K.
"""
# Standard Library Imports
import numpy as np
import torch
from collections import OrderedDict

# Third Party Imports
from torchvision import transforms
from torch.nn import functional as F
from torchvision.models import resnet50
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget


def inference(image, alpha, top_k, target_layer, model=None, classes=None):
    """
    Function to run inference on the input image
    :param image: Image provided by the user
    :param alpha: Percentage of cam overlap over the input image
    :param top_k: Number of top predictions for the input image
    :param target_layer: Layer for which GradCam to be shown
    :param model: Model to use for inference
    :param classes: Classes to use for inference
    """
    # Save a copy of input img
    org_img = image.copy()

    # Calculate mean over each channel of input image
    mean_r, mean_g, mean_b = np.mean(image[:, :, 0]/255.), np.mean(image[:, :, 1]/255.), np.mean(image[:, :, 2]/255.)

    # Calculate Standard deviation over each channel
    std_r, std_g, std_b = np.std(image[:, :, 0]/255.), np.std(image[:, :, 1]/255.), np.std(image[:, :, 2]/255.)

    # Convert img to tensor and normalize it
    _transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
        ])

    # Preprocess the input image
    input_tensor = _transform(image)

    # Create a mini-batch as expected by the model
    input_tensor = input_tensor.unsqueeze(0)

    # Move the input and model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_tensor = input_tensor.to(device)
    model.to(device)

    # Get Model Predictions
    with torch.no_grad():
        outputs = model(input_tensor)
        probabilities = torch.softmax(outputs, dim=1)[0]
        del outputs
        confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}

    # Select the top classes based on user input
    sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
    show_confidences = OrderedDict(sorted_confidences[:top_k])

    # Map layer numbers to meaningful parts of the ResNet architecture
    _layers = {
        1: model.conv1,          # Initial convolution layer
        2: model.layer1[-1],     # Last bottleneck of first residual block
        3: model.layer2[-1],     # Last bottleneck of second residual block
        4: model.layer3[-1],     # Last bottleneck of third residual block
        5: model.layer4[-1],     # Last bottleneck of fourth residual block
        6: model.layer4[-1]      # Changed from fc to last conv layer for better visualization
    }

    # Ensure valid layer selection
    target_layer = min(max(target_layer, 1), 6)
    target_layers = [_layers[target_layer]]

    # Get the class activations from the selected layer
    cam = GradCAM(model=model, target_layers=target_layers)

    # Get the most probable class index
    top_class = max(confidences.items(), key=lambda x: x[1])[0]
    class_idx = classes.index(top_class)

    # Generate GradCAM for the top predicted class
    grayscale_cam = cam(input_tensor=input_tensor,
                       targets=[ClassifierOutputTarget(class_idx)],
                       aug_smooth=True,
                       eigen_smooth=True)
    model.eval()
    grayscale_cam = grayscale_cam[0, :]

    # Overlay input image with Class activations
    visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
    return show_confidences, visualization