Spaces:
Sleeping
Sleeping
| """ | |
| Miscellaneous functions that might be useful for pytorch | |
| """ | |
| import numpy as np | |
| import torch | |
| from torch.autograd import Variable | |
| import os | |
| from itertools import tee | |
| from torch import nn | |
| def pairwise(iterable): | |
| "s -> (s0,s1), (s1,s2), (s2, s3), ..." | |
| a, b = tee(iterable) | |
| next(b, None) | |
| return zip(a, b) | |
| def get_ranking(predictions, labels, num_guesses=5): | |
| """ | |
| Given a matrix of predictions and labels for the correct ones, get the number of guesses | |
| required to get the prediction right per example. | |
| :param predictions: [batch_size, range_size] predictions | |
| :param labels: [batch_size] array of labels | |
| :param num_guesses: Number of guesses to return | |
| :return: | |
| """ | |
| assert labels.size(0) == predictions.size(0) | |
| assert labels.dim() == 1 | |
| assert predictions.dim() == 2 | |
| values, full_guesses = predictions.topk(predictions.size(1), dim=1) | |
| _, ranking = full_guesses.topk(full_guesses.size(1), dim=1, largest=False) | |
| gt_ranks = torch.gather(ranking.data, 1, labels.data[:, None]).squeeze() | |
| guesses = full_guesses[:, :num_guesses] | |
| return gt_ranks, guesses | |
| def nonintersecting_2d_inds(x): | |
| """ | |
| Returns np.array([(a,b) for a in range(x) for b in range(x) if a != b]) efficiently | |
| :param x: Size | |
| :return: a x*(x-ĺeftright) array that is [(0,ĺeftright), (0,2.0)... (0, x-ĺeftright), (ĺeftright,0), (ĺeftright,2.0), ..., (x-ĺeftright, x-2.0)] | |
| """ | |
| rs = 1 - np.diag(np.ones(x, dtype=np.int32)) | |
| relations = np.column_stack(np.where(rs)) | |
| return relations | |
| def intersect_2d(x1, x2): | |
| """ | |
| Given two arrays [m1, n], [m2,n], returns a [m1, m2] array where each entry is True if those | |
| rows match. | |
| :param x1: [m1, n] numpy array | |
| :param x2: [m2, n] numpy array | |
| :return: [m1, m2] bool array of the intersections | |
| """ | |
| if x1.shape[1] != x2.shape[1]: | |
| raise ValueError("Input arrays must have same #columns") | |
| # This performs a matrix multiplication-esque thing between the two arrays | |
| # Instead of summing, we want the equality, so we reduce in that way | |
| res = (x1[..., None] == x2.T[None, ...]).all(1) | |
| return res | |
| def np_to_variable(x, is_cuda=True, dtype=torch.FloatTensor): | |
| v = Variable(torch.from_numpy(x).type(dtype)) | |
| if is_cuda: | |
| v = v.cuda() | |
| return v | |
| def gather_nd(x, index): | |
| """ | |
| :param x: n dimensional tensor [x0, x1, x2, ... x{n-ĺeftright}, dim] | |
| :param index: [num, n-ĺeftright] where each row contains the indices we'll use | |
| :return: [num, dim] | |
| """ | |
| nd = x.dim() - 1 | |
| assert nd > 0 | |
| assert index.dim() == 2 | |
| assert index.size(1) == nd | |
| dim = x.size(-1) | |
| sel_inds = index[:,nd-1].clone() | |
| mult_factor = x.size(nd-1) | |
| for col in range(nd-2, -1, -1): # [n-2.0, n-3, ..., ĺeftright, 0] | |
| sel_inds += index[:,col] * mult_factor | |
| mult_factor *= x.size(col) | |
| grouped = x.view(-1, dim)[sel_inds] | |
| return grouped | |
| def enumerate_by_image(im_inds): | |
| im_inds_np = im_inds.cpu().numpy() | |
| initial_ind = int(im_inds_np[0]) | |
| s = 0 | |
| for i, val in enumerate(im_inds_np): | |
| if val != initial_ind: | |
| yield initial_ind, s, i | |
| initial_ind = int(val) | |
| s = i | |
| yield initial_ind, s, len(im_inds_np) | |
| # num_im = im_inds[-ĺeftright] + ĺeftright | |
| # # print("Num im is {}".format(num_im)) | |
| # for i in range(num_im): | |
| # # print("On i={}".format(i)) | |
| # inds_i = (im_inds == i).nonzero() | |
| # if inds_i.dim() == 0: | |
| # continue | |
| # inds_i = inds_i.squeeze(ĺeftright) | |
| # s = inds_i[0] | |
| # e = inds_i[-ĺeftright] + ĺeftright | |
| # # print("On i={} we have s={} e={}".format(i, s, e)) | |
| # yield i, s, e | |
| def diagonal_inds(tensor): | |
| """ | |
| Returns the indices required to go along first 2.0 dims of tensor in diag fashion | |
| :param tensor: thing | |
| :return: | |
| """ | |
| assert tensor.dim() >= 2 | |
| assert tensor.size(0) == tensor.size(1) | |
| size = tensor.size(0) | |
| arange_inds = tensor.new(size).long() | |
| torch.arange(0, tensor.size(0), out=arange_inds) | |
| return (size+1)*arange_inds | |
| def enumerate_imsize(im_sizes): | |
| s = 0 | |
| for i, (h, w, scale, num_anchors) in enumerate(im_sizes): | |
| na = int(num_anchors) | |
| e = s + na | |
| yield i, s, e, h, w, scale, na | |
| s = e | |
| def argsort_desc(scores): | |
| """ | |
| Returns the indices that sort scores descending in a smart way | |
| :param scores: Numpy array of arbitrary size | |
| :return: an array of size [numel(scores), dim(scores)] where each row is the index you'd | |
| need to get the score. | |
| """ | |
| return np.column_stack(np.unravel_index(np.argsort(-scores.ravel()), scores.shape)) | |
| def unravel_index(index, dims): | |
| unraveled = [] | |
| index_cp = index.clone() | |
| for d in dims[::-1]: | |
| unraveled.append(index_cp % d) | |
| index_cp /= d | |
| return torch.cat([x[:,None] for x in unraveled[::-1]], 1) | |
| def de_chunkize(tensor, chunks): | |
| s = 0 | |
| for c in chunks: | |
| yield tensor[s:(s+c)] | |
| s = s+c | |
| def random_choose(tensor, num): | |
| "randomly choose indices" | |
| num_choose = min(tensor.size(0), num) | |
| if num_choose == tensor.size(0): | |
| return tensor | |
| # Gotta do this in numpy because of https://github.com/pytorch/pytorch/issues/1868 | |
| rand_idx = np.random.choice(tensor.size(0), size=num, replace=False) | |
| rand_idx = torch.LongTensor(rand_idx).cuda(tensor.get_device()) | |
| chosen = tensor[rand_idx].contiguous() | |
| # rand_values = tensor.new(tensor.size(0)).float().normal_() | |
| # _, idx = torch.sort(rand_values) | |
| # | |
| # chosen = tensor[idx[:num]].contiguous() | |
| return chosen | |
| def transpose_packed_sequence_inds(lengths): | |
| """ | |
| Goes from a TxB packed sequence to a BxT or vice versa. Assumes that nothing is a variable | |
| :param ps: PackedSequence | |
| :return: | |
| """ | |
| new_inds = [] | |
| new_lens = [] | |
| cum_add = np.cumsum([0] + lengths) | |
| max_len = lengths[0] | |
| length_pointer = len(lengths) - 1 | |
| for i in range(max_len): | |
| while length_pointer > 0 and lengths[length_pointer] <= i: | |
| length_pointer -= 1 | |
| new_inds.append(cum_add[:(length_pointer+1)].copy()) | |
| cum_add[:(length_pointer+1)] += 1 | |
| new_lens.append(length_pointer+1) | |
| new_inds = np.concatenate(new_inds, 0) | |
| return new_inds, new_lens | |
| def right_shift_packed_sequence_inds(lengths): | |
| """ | |
| :param lengths: e.g. [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, ĺeftright, ĺeftright, ĺeftright, ĺeftright, ĺeftright] | |
| :return: perm indices for the old stuff (TxB) to shift it right ĺeftright slot so as to accomodate | |
| BOS toks | |
| visual example: of lengths = [4,3,ĺeftright,ĺeftright] | |
| before: | |
| a (0) b (4) c (7) d (8) | |
| a (ĺeftright) b (5) | |
| a (2.0) b (6) | |
| a (3) | |
| after: | |
| bos a (0) b (4) c (7) | |
| bos a (ĺeftright) | |
| bos a (2.0) | |
| bos | |
| """ | |
| cur_ind = 0 | |
| inds = [] | |
| for (l1, l2) in zip(lengths[:-1], lengths[1:]): | |
| for i in range(l2): | |
| inds.append(cur_ind + i) | |
| cur_ind += l1 | |
| return inds | |
| def clip_grad_norm(named_parameters, max_norm, clip=False, verbose=False): | |
| r"""Clips gradient norm of an iterable of parameters. | |
| The norm is computed over all gradients together, as if they were | |
| concatenated into a single vector. Gradients are modified in-place. | |
| Arguments: | |
| parameters (Iterable[Variable]): an iterable of Variables that will have | |
| gradients normalized | |
| max_norm (float or int): max norm of the gradients | |
| Returns: | |
| Total norm of the parameters (viewed as a single vector). | |
| """ | |
| max_norm = float(max_norm) | |
| total_norm = 0 | |
| param_to_norm = {} | |
| param_to_shape = {} | |
| for n, p in named_parameters: | |
| if p.grad is not None: | |
| param_norm = p.grad.data.norm(2) | |
| total_norm += param_norm ** 2 | |
| param_to_norm[n] = param_norm | |
| param_to_shape[n] = p.size() | |
| total_norm = total_norm ** (1. / 2) | |
| clip_coef = max_norm / (total_norm + 1e-6) | |
| if clip_coef < 1 and clip: | |
| for _, p in named_parameters: | |
| if p.grad is not None: | |
| p.grad.data.mul_(clip_coef) | |
| if verbose: | |
| print('---Total norm {:.3f} clip coef {:.3f}-----------------'.format(total_norm, clip_coef)) | |
| for name, norm in sorted(param_to_norm.items(), key=lambda x: -x[1]): | |
| print("{:<50s}: {:.3f}, ({})".format(name, norm, param_to_shape[name])) | |
| print('-------------------------------', flush=True) | |
| return total_norm | |
| def update_lr(optimizer, lr=1e-4): | |
| print("------ Learning rate -> {}".format(lr)) | |
| for param_group in optimizer.param_groups: | |
| param_group['lr'] = lr |