| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | """ |
| | Semantic Relatedness (supervised) with Pytorch |
| | """ |
| | from __future__ import absolute_import, division, unicode_literals |
| |
|
| | import copy |
| | import numpy as np |
| |
|
| | import torch |
| | from torch import nn |
| | import torch.optim as optim |
| |
|
| | from scipy.stats import pearsonr, spearmanr |
| |
|
| |
|
| | class RelatednessPytorch(object): |
| | |
| | def __init__(self, train, valid, test, devscores, config): |
| | |
| | np.random.seed(config['seed']) |
| | torch.manual_seed(config['seed']) |
| | assert torch.cuda.is_available(), 'torch.cuda required for Relatedness' |
| | torch.cuda.manual_seed(config['seed']) |
| |
|
| | self.train = train |
| | self.valid = valid |
| | self.test = test |
| | self.devscores = devscores |
| |
|
| | self.inputdim = train['X'].shape[1] |
| | self.nclasses = config['nclasses'] |
| | self.seed = config['seed'] |
| | self.l2reg = 0. |
| | self.batch_size = 64 |
| | self.maxepoch = 1000 |
| | self.early_stop = True |
| |
|
| | self.model = nn.Sequential( |
| | nn.Linear(self.inputdim, self.nclasses), |
| | nn.Softmax(dim=-1), |
| | ) |
| | self.loss_fn = nn.MSELoss() |
| |
|
| | if torch.cuda.is_available(): |
| | self.model = self.model.cuda() |
| | self.loss_fn = self.loss_fn.cuda() |
| |
|
| | self.loss_fn.size_average = False |
| | self.optimizer = optim.Adam(self.model.parameters(), |
| | weight_decay=self.l2reg) |
| |
|
| | def prepare_data(self, trainX, trainy, devX, devy, testX, testy): |
| | |
| | trainX = torch.from_numpy(trainX).float().cuda() |
| | trainy = torch.from_numpy(trainy).float().cuda() |
| | devX = torch.from_numpy(devX).float().cuda() |
| | devy = torch.from_numpy(devy).float().cuda() |
| | testX = torch.from_numpy(testX).float().cuda() |
| | testY = torch.from_numpy(testy).float().cuda() |
| |
|
| | return trainX, trainy, devX, devy, testX, testy |
| |
|
| | def run(self): |
| | self.nepoch = 0 |
| | bestpr = -1 |
| | early_stop_count = 0 |
| | r = np.arange(1, 6) |
| | stop_train = False |
| |
|
| | |
| | trainX, trainy, devX, devy, testX, testy = self.prepare_data( |
| | self.train['X'], self.train['y'], |
| | self.valid['X'], self.valid['y'], |
| | self.test['X'], self.test['y']) |
| |
|
| | |
| | while not stop_train and self.nepoch <= self.maxepoch: |
| | self.trainepoch(trainX, trainy, nepoches=50) |
| | yhat = np.dot(self.predict_proba(devX), r) |
| | pr = spearmanr(yhat, self.devscores)[0] |
| | pr = 0 if pr != pr else pr |
| | |
| | if pr > bestpr: |
| | bestpr = pr |
| | bestmodel = copy.deepcopy(self.model) |
| | elif self.early_stop: |
| | if early_stop_count >= 3: |
| | stop_train = True |
| | early_stop_count += 1 |
| | self.model = bestmodel |
| |
|
| | yhat = np.dot(self.predict_proba(testX), r) |
| |
|
| | return bestpr, yhat |
| |
|
| | def trainepoch(self, X, y, nepoches=1): |
| | self.model.train() |
| | for _ in range(self.nepoch, self.nepoch + nepoches): |
| | permutation = np.random.permutation(len(X)) |
| | all_costs = [] |
| | for i in range(0, len(X), self.batch_size): |
| | |
| | idx = torch.from_numpy(permutation[i:i + self.batch_size]).long().cuda() |
| | Xbatch = X[idx] |
| | ybatch = y[idx] |
| | output = self.model(Xbatch) |
| | |
| | loss = self.loss_fn(output, ybatch) |
| | all_costs.append(loss.item()) |
| | |
| | self.optimizer.zero_grad() |
| | loss.backward() |
| | |
| | self.optimizer.step() |
| | self.nepoch += nepoches |
| |
|
| | def predict_proba(self, devX): |
| | self.model.eval() |
| | probas = [] |
| | with torch.no_grad(): |
| | for i in range(0, len(devX), self.batch_size): |
| | Xbatch = devX[i:i + self.batch_size] |
| | if len(probas) == 0: |
| | probas = self.model(Xbatch).data.cpu().numpy() |
| | else: |
| | probas = np.concatenate((probas, self.model(Xbatch).data.cpu().numpy()), axis=0) |
| | return probas |
| |
|