SongFormer / musicfm /modules /random_quantizer.py

add one-click func

d0690fd 5 months ago

3.06 kB

	# MIT License
	#
	# Copyright 2023 ByteDance Inc.
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”),
	# to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
	# and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	# IN THE SOFTWARE.

	import torch
	from torch import nn, einsum
	from einops import rearrange


	class RandomProjectionQuantizer(nn.Module):
	"""
	Random projection and codebook lookup module

	Some code is borrowed from:
	https://github.com/lucidrains/vector-quantize-pytorch/blob/master/vector_quantize_pytorch/random_projection_quantizer.py
	But I did normalization using pre-computed global mean & variance instead of using layer norm.
	"""

	def __init__(
	self,
	input_dim,
	codebook_dim,
	codebook_size,
	seed=142,
	):
	super().__init__()

	# random seed
	torch.manual_seed(seed)

	# randomly initialized projection
	random_projection = torch.empty(input_dim, codebook_dim)
	nn.init.xavier_normal_(random_projection)
	self.register_buffer("random_projection", random_projection)

	# randomly initialized codebook
	codebook = torch.empty(codebook_size, codebook_dim)
	nn.init.normal_(codebook)
	self.register_buffer("codebook", codebook)

	def codebook_lookup(self, x):
	# reshape
	b = x.shape[0]
	x = rearrange(x, "b n e -> (b n) e")

	# L2 normalization
	normalized_x = nn.functional.normalize(x, dim=1, p=2)
	normalized_codebook = nn.functional.normalize(self.codebook, dim=1, p=2)

	# compute distances
	distances = torch.cdist(normalized_codebook, normalized_x)

	# get nearest
	nearest_indices = torch.argmin(distances, dim=0)

	# reshape
	xq = rearrange(nearest_indices, "(b n) -> b n", b=b)

	return xq

	@torch.no_grad()
	def forward(self, x):
	# always eval
	self.eval()

	# random projection [batch, length, input_dim] -> [batch, length, codebook_dim]
	x = einsum("b n d, d e -> b n e", x, self.random_projection)

	# codebook lookup
	xq = self.codebook_lookup(x)

	return xq