import numpy as np
import torch
import torch.nn as nn

# Utility functions for Faster R-CNN layers

class Config:

    verbose = False
    save_dir = './models'
    conv_net_path = 'checkpoints/vgg16_bn-6c64b313.pth'
    backbone = 'vgg'
    pretrain_conv_net = True
    device = 'cuda'
    classification = False
    
    # General params
    num_classes = 1
    input_channels = 1
    img_width = 1024
    img_height = 256
    use_biophonia = True
    fpn = True
    fpn_rpn = False
    fpn_p_channels = 256
    fpn_o_channels = 256
    normalize_input = False
    noise_strength = 0
    self_attention = False
    encode_frequency = False
    position_encoding = False
    transform = False

    # Anchors
    anchor_stride = 16
    base_size = 16
    ratios = [0.5, 1, 2]
    scales_factor_low = 0
    scales_factor_high = 4
    scales = 2**np.arange(scales_factor_low, scales_factor_high)
    n_anchors = len(ratios) * len(scales)
    
    # Anchor Target Layer
    rpn_neg_label = 0.3
    rpn_pos_label = 0.7 # p-e baisser un peu ça
    rpn_batchsize = 16 # jouer là dessus, le réduire exagérément ?
    rpn_fg_fraction = 0.5
    
    # Proposal Layer
    pre_nms_topN = 3000 # jouer là dessus
    min_threshold = 5 # minimum proposal size in px
    nms_thresh = 0.7
    post_nms_topN = 1000 # jouer là dessus
    post_nms_topN_eval = 50
    pre_nms_topN_eval = 500
    
    # Proposal Target Layer
    rcnn_batch_size = 16 # jouer là dessus
    rcnn_fg_prop = 0.4 # 0.25 dans le papier original, essayer différentes valeurs
    fg_threshold = 0.5
    bg_threshold_lo = 0.1
    bg_threshold_hi = 0.5
    
    # ROI Pooling
    roi_pool_h = 2 # à changer (3? 4?)
    roi_pool_w = 2 # à changer (3? 4?)
    hidden_size = 4096
    top_pyramid_roi_size = 128
    rcnn_attention = False
    
    # Inference
    proposal_number = 50 # number of proposals per class after last nms
    
    # Training
    lambda_reg_rpn_loss = 1.0 # tester qq autres val
    lambda_reg_rcnn_loss = 1.0 # tester qq autres val
    batch_size = 2
    val_size = 20
    learning_rate = 1e-4 # jouer la dessus
    validation_prop = 0.01
    n_epochs = 10
    save_every = 10
    scheduler_gamma = 0.1
    scheduler_milestones = [15, 25]
    cv_idx = -1


def generate_anchors(base_size, ratios, scales):
    
    base_anchor_wh = np.array([base_size, base_size])

    # Deform base anchor dimensions to the given ratios 
    coeffs = np.hstack([np.sqrt(ratios)[:, np.newaxis], (1 / np.sqrt(ratios))[:, np.newaxis]])
    ratios_anchors_wh = coeffs * np.sqrt(np.prod(base_anchor_wh))

    # Expand the resulting anchor dimensions to the given sizes
    all_anchor_whs = (ratios_anchors_wh.flatten() * scales[:, np.newaxis]).reshape(-1, 2)

    # Convert from w h to x1 y1 x2 y2 representation, given center coordinates at int(base_size / 2)
    all_anchor = (np.hstack([- all_anchor_whs / 2, all_anchor_whs / 2]) + int(base_size / 2)).astype(int)

    return all_anchor


def get_anchor_shifts(width, height, anchor_stride):

    shift_x = np.arange(0, width) * anchor_stride
    shift_y = np.arange(0, height) * anchor_stride
    shifts = np.hstack([np.tile(shift_x, len(shift_y)).reshape(-1, 1), np.repeat(shift_y, len(shift_x)).reshape(-1, 1)])
    shifts = np.tile(shifts, 2)
    
    return shifts.reshape(-1, 1, 4)


def bbox_overlap(anchors, bbox):
    """
    Computes a K (anchors) x N (bbox) intersection over union matrix
    """
    
    right_boundaries = torch.stack([anchors[:, 2].repeat(len(bbox)), bbox[:, 2].repeat_interleave(len(anchors))]).min(dim=0)[0]
    left_boundaries = torch.stack([anchors[:, 0].repeat(len(bbox)), bbox[:, 0].repeat_interleave(len(anchors))]).max(dim=0)[0]
    x_intersec = (right_boundaries - left_boundaries + 1).clamp(min=0)

    # shapes anchors * bbox

    top_boundaries = torch.stack([anchors[:, 3].repeat(len(bbox)), bbox[:, 3].repeat_interleave(len(anchors))]).min(dim=0)[0]
    bottom_boundaries = torch.stack([anchors[:, 1].repeat(len(bbox)), bbox[:, 1].repeat_interleave(len(anchors))]).max(dim=0)[0]
    y_intersec = (top_boundaries - bottom_boundaries + 1).clamp(min=0)

    intersection = x_intersec * y_intersec

    areas_anchors = (anchors[:, 2] - anchors[:, 0] + 1) * (anchors[:, 3] - anchors[:, 1] + 1)
    areas_bbox = (bbox[:, 2] - bbox[:, 0] + 1) * (bbox[:, 3] - bbox[:, 1] + 1)

    union = torch.stack([areas_anchors.repeat(len(bbox)), areas_bbox.repeat_interleave(len(anchors))]).sum(dim=0) - intersection
    iou = (intersection / union).view(len(bbox), len(anchors)).transpose(1, 0)
    
    return iou


def bbox_transform(anchors, bbox):
    
    wa = (anchors[:, 2] - anchors[:, 0]) + 1
    ha = (anchors[:, 3] - anchors[:, 1]) + 1
    xa = anchors[:, 0] + 0.5 * wa
    ya = anchors[:, 1] + 0.5 * ha

    w = (bbox[:, 2] - bbox[:, 0]) + 1
    h = (bbox[:, 3] - bbox[:, 1]) + 1
    x = bbox[:, 0] + 0.5 * w
    y = bbox[:, 1] + 0.5 * h
    
    t_x = (x - xa) / wa
    t_y = (y - ya) / ha
    t_w = torch.log(w / wa)
    t_h = torch.log(h / ha)
    
    return torch.stack([t_x, t_y, t_w, t_h]).transpose(1, 0)


def weight_init(m):
    classname = m.__class__.__name__
    if classname.find('BatchNorm') != -1:
        m.weight.data.normal_(0.0, 0.02)
    if (classname.find('Linear') != -1) & (classname.find('LinearLayer') == -1):
        nn.init.kaiming_normal_(m.weight)
    if (classname.find('Conv2d') != -1):
        nn.init.kaiming_normal_(m.weight)


def collate_fn(list_batch):
    lengths = [elt[1].size(0) for elt in list_batch]
    img_batch = torch.stack([img for (img, bb_cord, bird_id, img_info) in list_batch])
    bb_coord_batch = torch.cat([bb_cord for (img, bb_cord, bird_id, img_info) in list_batch], dim=0)
    bird_ids = torch.cat([bird_id for (img, bb_cord, bird_id, img_info) in list_batch])
    img_infos = [elt[-1] for elt in list_batch]
    
    return [img_batch, bb_coord_batch, lengths, bird_ids, img_infos]

        
def bbox_reg_to_coord(bbox_pred, anchors):

    wa = (anchors[:, 2] - anchors[:, 0]) + 1
    ha = (anchors[:, 3] - anchors[:, 1]) + 1
    xa = anchors[:, 0] + 0.5 * wa
    ya = anchors[:, 1] + 0.5 * ha

    t_x = bbox_pred[..., 0]
    t_y = bbox_pred[..., 1]
    t_w = bbox_pred[..., 2]
    t_h = bbox_pred[..., 3]

    x = (t_x * wa) + xa
    y = (t_y * ha) + ya
    w = torch.exp(t_w) * wa
    h = torch.exp(t_h) * ha

    return torch.stack([(x - 0.5 * w).round(), (y - 0.5 * h).round(), (x + 0.5 * w).round(), (y + 0.5 * h).round()], dim=2)


def batch_self_overlap(bbox_pred):

    rep = bbox_pred.shape[1]

    right_boundaries = torch.stack([bbox_pred[..., 2].repeat(1, rep), bbox_pred[..., 2].repeat_interleave(rep, dim=1)]).min(dim=0)[0]
    left_boundaries = torch.stack([bbox_pred[..., 0].repeat(1, rep), bbox_pred[..., 0].repeat_interleave(rep, dim=1)]).max(dim=0)[0]
    x_intersec = (right_boundaries - left_boundaries + 1).clamp(min=0)

    top_boundaries = torch.stack([bbox_pred[..., 3].repeat(1, rep), bbox_pred[..., 3].repeat_interleave(rep, dim=1)]).min(dim=0)[0]
    bottom_boundaries = torch.stack([bbox_pred[..., 1].repeat(1, rep), bbox_pred[..., 1].repeat_interleave(rep, dim=1)]).max(dim=0)[0]
    y_intersec = (top_boundaries - bottom_boundaries + 1).clamp(min=0)

    intersection = x_intersec * y_intersec

    areas = (bbox_pred[..., 2] - bbox_pred[..., 0] + 1) * (bbox_pred[..., 3] - bbox_pred[..., 1] + 1)
    union = torch.stack([areas.repeat(1, rep), areas.repeat_interleave(rep, dim=1)]).sum(dim=0) - intersection
    iou = (intersection / union).view(-1, rep, rep)
    
    return iou


def nms(bbox_pred, scores, nms_thresh=0.7, post_nms_topN=300, return_idx=False):
    """
    Applies non maximum suppression to the predicted bbox coordinates bbox_pred (shape batch_size * n_boxes * 4)
    scores are sorted in decreasing order, and bbox_pred coordinates are sorted accordingly for each batch idx
    """
    
    iou = batch_self_overlap(bbox_pred)

    batch_keep = []
    batch_size = len(bbox_pred)

    for b_idx in range(batch_size):

        suppress = []
        keep_idx = []
        b_iou = iou[b_idx]

        for idx in range(len(b_iou)):
            if idx in suppress:
                continue
            keep_idx.append(idx)
            suppress += (torch.nonzero(b_iou[idx, idx + 1:] >= nms_thresh)[:, 0] + idx + 1).tolist()

        batch_keep.append(keep_idx)

    # Truncate idx vectors if one has length < post nms topN
    post_nms_topN = min(np.array([len(b_keep) for b_keep in batch_keep]).min(), post_nms_topN)
    scores = torch.stack([scores[i, batch_keep[i][:post_nms_topN]] for i in range(batch_size)])
    bbox_pred = torch.stack([bbox_pred[i, batch_keep[i][:post_nms_topN], :] for i in range(batch_size)])

    out = bbox_pred, scores

    if return_idx:
        out += (batch_keep,)
    
    return out


def get_bbox_regression_targets(bbox_targets, b_labels, num_classes):
    """
    One regression objective per object class
    """

    expanded_bbox_targets = torch.zeros(len(bbox_targets), 4 * (1 + num_classes)).cuda()
    for i in range(1, num_classes + 1):
        class_idx = torch.nonzero(b_labels == i)[:, 0]
        col_idx = 4 * i
        expanded_bbox_targets[class_idx, col_idx:col_idx + 4] = bbox_targets[class_idx]
        
    return expanded_bbox_targets


def cross_entropy_loss(bbox_classes, labels):
    """
    labels must be a flatten (numpy) array of class indices (0 for background)
    """
    
    gt_probs = bbox_classes[range(len(bbox_classes)), labels]
    cel = (-torch.log(gt_probs)).sum()
    
    return cel


def smooth_l1_loss(bbox_reg, bbox_targets):

    deltas = torch.abs(bbox_reg - bbox_targets)
    mask_smoothing = (deltas >= 1)
    smoothed_l1 = (~mask_smoothing).float() * 0.5 * (deltas**2) + mask_smoothing.float() * (deltas - 0.5)
    
    return smoothed_l1


def bool_parser(string):
    if string.lower() == 'false':
        return False
    return True


def train_test_split(length, val_prop):
    indices = np.arange(length)
    np.random.shuffle(indices)
    cut = int(val_prop * length)
    return indices[cut:], indices[:cut]

def position_encodings(x, device):
    bs, channels, height, width = x.shape
    i_idx = np.arange(width)
    j_idx = np.arange(height)
    
    position_encodings = torch.from_numpy(np.stack(
        [np.tile(np.sin(i_idx * 128 / (width * (1e4 ** (2 * k / channels)))), (height, 1)) for k in range(int(channels / 4))] + \
        [np.tile(np.cos(i_idx * 128 / (width * (1e4 ** (2 * k / channels)))), (height, 1)) for k in range(int(channels / 4))] + \
        [np.tile(np.sin(j_idx * 128 / (height * (1e4 ** (2 * k / channels))))[:, np.newaxis], (1, width)) for k in range(int(channels / 4))] + \
        [np.tile(np.cos(j_idx * 128 / (height * (1e4 ** (2 * k / channels))))[:, np.newaxis], (1, width)) for k in range(int(channels / 4))]
    )).to(device)
    
    return position_encodings.unsqueeze(0).repeat(bs, 1, 1, 1).float()