TR2-D2 / tr2d2-pep /finetune_utils.py

Sophia Tang

Initial commit

5e90249 6 months ago

5.89 kB

	import random
	import torch
	from torch.utils.data import DataLoader, TensorDataset
	from utils.utils import sample_categorical_logits
	import numpy as np
	from tqdm import tqdm
	import torch.distributed as dist
	import torch.nn.functional as F

	def to_one_hot(x_idx, num_classes=4):
	oh = F.one_hot(x_idx.long(), num_classes=num_classes)
	return oh.float()

	def rnd(model, reward_model, batch_size, scale=1, device='cuda:0'):
	r"""
	Run random order sampling and compute the RND $\log\frac{dP^*}{dP^u}$ along the trajectory
	reward_model: r(X)

	return:
	- x: the final samples, [B, D]
	- log_rnd: the log RND along this trajectory, [B]
	"""
	if hasattr(model, 'module'):
	model = model.module

	x = torch.full((batch_size, model.length), model.vocab_size-1).to(device=device, dtype=torch.int64)
	batch_arange = torch.arange(batch_size, device=device)
	jump_pos = torch.rand(x.shape, device=device).argsort(dim=-1)
	# jump_times, jump_pos = torch.rand(x.shape, device=device).sort(dim=-1)
	# jump_times: Unif[0,1] in increasing order
	# jump_pos: random permutation of range(D)
	log_rnd = torch.zeros(batch_size, device=device) # [B]
	for d in range(model.length-1, -1, -1):
	# jump at time jump_times[:, d] at position jump_pos[:, d]
	logits = model(x)[:, :, :-1] # [B, D, N-1]
	update = sample_categorical_logits(
	logits[batch_arange, jump_pos[:, d]]) # [B]
	if torch.is_grad_enabled(): # avoid issues with in-place operations
	x = x.clone()
	x[batch_arange, jump_pos[:, d]] = update
	log_rnd += -np.log(model.vocab_size-1) - logits[batch_arange, jump_pos[:, d], update]
	log_rnd += scale * reward_model(x) # [B]
	return x, log_rnd


	@torch.no_grad()
	def sampling(model, batch_size, rounds=1, device='cuda:0'):
	"""Any order autoregressive sampling"""
	if hasattr(model, 'module'):
	model = model.module
	batch_arange = torch.arange(batch_size, device=device)
	all_samples = []
	for _ in tqdm(range(rounds), leave=False):
	x = torch.full((batch_size, model.length), model.vocab_size-1).to(device=device, dtype=torch.int64)
	jump_pos = torch.rand(x.shape, device=device).argsort(dim=-1)
	# jump_times, jump_pos = torch.rand(x.shape, device=device).sort(dim=-1)
	# jump_times: Unif[0,1] in increasing order
	# jump_pos: random permutation of range(D)
	for d in tqdm(range(model.length-1, -1, -1), leave=False):
	# jump at time jump_times[:, d] at position jump_pos[:, d]
	logits = model.logits(x)[:, :, :-1] # [B, D, N-1], not log-softmaxed but fine
	update = sample_categorical_logits(
	logits[batch_arange, jump_pos[:, d]]) # [B]
	x[batch_arange, jump_pos[:, d]] = update
	all_samples.append(x)
	return torch.cat(all_samples) # (rounds * B, L)


	def loss_ce(log_rnd):
	"""Cross entropy loss KL(P^*\|\|P^u)"""
	weights = log_rnd.detach().softmax(dim=-1)
	return (log_rnd * weights).sum()


	def loss_lv(log_rnd):
	r"""Log variance loss Var_{P^\bar{u}}\log\frac{dP^*}{dP^u}"""
	return log_rnd.var()


	def loss_re_rf(log_rnd, const=0):
	r"""Relative entropy loss KL(P^u\|\|P^*) with REINFORCE trick"""
	return (-log_rnd * (-log_rnd.detach() + const)).mean()


	def loss_wdce(policy_model, log_rnd, x, num_replicates=16, weight_func=lambda l: 1/l, eps=1e-3, centering=False):
	r"""
	Weighted denoising cross entropy loss
	X_T ~ P^u_T and weights \log\frac{dP^*}{dP^u}(X)

	log_rnd: [B]; x: [B, L] (no mask)
	num_replicates: R, number of replicates of each row in x
	weight_func: w(lambda) for each sample, 1/lambda by default
	"""
	mask_index = policy_model.mask_index
	if hasattr(policy_model, 'module'):
	policy_model = policy_model.module

	batch = x.repeat_interleave(num_replicates, dim=0) # [B*R, L]

	batch_weights = log_rnd.detach_().softmax(dim=-1) # [B*R]
	if centering:
	batch_weights = batch_weights - batch_weights.mean(dim=-1, keepdim=True)

	batch_weights = batch_weights.repeat_interleave(num_replicates, dim=0)

	lamda = torch.rand(batch.shape[0], device=batch.device) # [B*R]
	lamda_weights = weight_func(lamda).clamp(max=1e5) # [B*R]

	masked_index = torch.rand(batch.shape, device=batch.device) < lamda[..., None] # [BR, D]
	perturbed_batch = torch.where(masked_index, mask_index, batch)

	# add time conditioning
	t = lamda
	sigma_t = -torch.log1p(-(1 - eps) * t)
	attn_mask = torch.ones_like(perturbed_batch).to(policy_model.device)

	# compute logits
	logits = policy_model(perturbed_batch, attn_mask=attn_mask, sigma=sigma_t)
	losses = torch.zeros(batch.shape, device=batch.device, dtype=logits.dtype) # [BR, D]
	losses[masked_index] = torch.gather(input=logits[masked_index], dim=-1,
	index=batch[masked_index][..., None]).squeeze(-1)
	return - (losses.sum(dim=-1) * lamda_weights * batch_weights).mean()


	def loss_dce(model, x, weight_func=lambda l: 1/l):
	r"""
	Denoising cross entropy loss, x [B, D] are ground truth samples
	weight_func: w(lambda) for each sample, 1/lambda by default
	"""
	lamda = torch.rand(x.shape[0], device=x.device) # [B]
	lamda_weights = weight_func(lamda).clamp(max=1e5) # [B]
	masked_index = torch.rand(*x.shape, device=x.device) < lamda[..., None] # [B, D]
	perturbed_batch = torch.where(masked_index, model.vocab_size-1, x)
	logits = model(perturbed_batch)
	losses = torch.zeros(*x.shape, device=x.device, dtype=logits.dtype) # [B, D]
	losses[masked_index] = torch.gather(input=logits[masked_index], dim=-1,
	index=x[masked_index][..., None]).squeeze(-1)
	return - (losses.sum(dim=-1) * lamda_weights).mean()