varox34's picture
Upload 64 files
366b225 verified
# -*- coding: utf-8 -*-
import torch
from torch.nn.utils.rnn import pad_sequence
def kmeans(x, k):
x = torch.tensor(x, dtype=torch.float)
# count the frequency of each datapoint
d, indices, f = x.unique(return_inverse=True, return_counts=True)
# calculate the sum of the values of the same datapoints
total = d * f
# initialize k centroids randomly
c, old = d[torch.randperm(len(d))[:k]], None
# assign labels to each datapoint based on centroids
dists, y = torch.abs_(d.unsqueeze(-1) - c).min(dim=-1)
# make sure number of datapoints is greater than that of clusters
assert len(d) >= k, f"unable to assign {len(d)} datapoints to {k} clusters"
while old is None or not c.equal(old):
# if an empty cluster is encountered,
# choose the farthest datapoint from the biggest cluster
# and move that the empty one
for i in range(k):
if not y.eq(i).any():
mask = y.eq(torch.arange(k).unsqueeze(-1))
lens = mask.sum(dim=-1)
biggest = mask[lens.argmax()].nonzero().view(-1)
farthest = dists[biggest].argmax()
y[biggest[farthest]] = i
mask = y.eq(torch.arange(k).unsqueeze(-1))
# update the centroids
c, old = (total * mask).sum(-1) / (f * mask).sum(-1), c
# re-assign all datapoints to clusters
dists, y = torch.abs_(d.unsqueeze(-1) - c).min(dim=-1)
# assign all datapoints to the new-generated clusters
# without considering the empty ones
y, assigned = y[indices], y.unique().tolist()
# get the centroids of the assigned clusters
centroids = c[assigned].tolist()
# map all values of datapoints to buckets
clusters = [torch.where(y.eq(i))[0].tolist() for i in assigned]
return centroids, clusters
def eisner(scores, mask):
lens = mask.sum(1)
batch_size, seq_len, _ = scores.shape
scores = scores.permute(2, 1, 0)
s_i = torch.full_like(scores, float('-inf'))
s_c = torch.full_like(scores, float('-inf'))
p_i = scores.new_zeros(seq_len, seq_len, batch_size).long()
p_c = scores.new_zeros(seq_len, seq_len, batch_size).long()
s_c.diagonal().fill_(0)
for w in range(1, seq_len):
n = seq_len - w
starts = p_i.new_tensor(range(n)).unsqueeze(0)
# ilr = C(i->r) + C(j->r+1)
ilr = stripe(s_c, n, w) + stripe(s_c, n, w, (w, 1))
# [batch_size, n, w]
ilr = ilr.permute(2, 0, 1)
il = ilr + scores.diagonal(-w).unsqueeze(-1)
# I(j->i) = max(C(i->r) + C(j->r+1) + s(j->i)), i <= r < j
il_span, il_path = il.max(-1)
s_i.diagonal(-w).copy_(il_span)
p_i.diagonal(-w).copy_(il_path + starts)
ir = ilr + scores.diagonal(w).unsqueeze(-1)
# I(i->j) = max(C(i->r) + C(j->r+1) + s(i->j)), i <= r < j
ir_span, ir_path = ir.max(-1)
s_i.diagonal(w).copy_(ir_span)
p_i.diagonal(w).copy_(ir_path + starts)
# C(j->i) = max(C(r->i) + I(j->r)), i <= r < j
cl = stripe(s_c, n, w, (0, 0), 0) + stripe(s_i, n, w, (w, 0))
cl_span, cl_path = cl.permute(2, 0, 1).max(-1)
s_c.diagonal(-w).copy_(cl_span)
p_c.diagonal(-w).copy_(cl_path + starts)
# C(i->j) = max(I(i->r) + C(r->j)), i < r <= j
cr = stripe(s_i, n, w, (0, 1)) + stripe(s_c, n, w, (1, w), 0)
cr_span, cr_path = cr.permute(2, 0, 1).max(-1)
s_c.diagonal(w).copy_(cr_span)
s_c[0, w][lens.ne(w)] = float('-inf')
p_c.diagonal(w).copy_(cr_path + starts + 1)
predicts = []
p_c = p_c.permute(2, 0, 1).cpu()
p_i = p_i.permute(2, 0, 1).cpu()
for i, length in enumerate(lens.tolist()):
heads = p_c.new_ones(length + 1, dtype=torch.long)
backtrack(p_i[i], p_c[i], heads, 0, length, True)
predicts.append(heads.to(mask.device))
return pad_sequence(predicts, True)
def backtrack(p_i, p_c, heads, i, j, complete):
if i == j:
return
if complete:
r = p_c[i, j]
backtrack(p_i, p_c, heads, i, r, False)
backtrack(p_i, p_c, heads, r, j, True)
else:
r, heads[j] = p_i[i, j], i
i, j = sorted((i, j))
backtrack(p_i, p_c, heads, i, r, True)
backtrack(p_i, p_c, heads, j, r + 1, True)
def stripe(x, n, w, offset=(0, 0), dim=1):
r'''Returns a diagonal stripe of the tensor.
Parameters:
x (Tensor): the input tensor with 2 or more dims.
n (int): the length of the stripe.
w (int): the width of the stripe.
offset (tuple): the offset of the first two dims.
dim (int): 0 if returns a horizontal stripe; 1 else.
Example::
>>> x = torch.arange(25).view(5, 5)
>>> x
tensor([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24]])
>>> stripe(x, 2, 3, (1, 1))
tensor([[ 6, 7, 8],
[12, 13, 14]])
>>> stripe(x, 2, 3, dim=0)
tensor([[ 0, 5, 10],
[ 6, 11, 16]])
'''
x, seq_len = x.contiguous(), x.size(1)
stride, numel = list(x.stride()), x[0, 0].numel()
stride[0] = (seq_len + 1) * numel
stride[1] = (1 if dim == 1 else seq_len) * numel
return x.as_strided(size=(n, w, *x.shape[2:]),
stride=stride,
storage_offset=(offset[0]*seq_len+offset[1])*numel)