Spaces:

ginipick
/

StyleGen

Paused

App Files Files Community

StyleGen / comfy /ldm /audio /embedders.py

multimodalart HF Staff

Squashing commit

4450790 verified over 1 year ago

raw

history blame contribute delete

3.47 kB

	# code adapted from: https://github.com/Stability-AI/stable-audio-tools

	import torch
	import torch.nn as nn
	from torch import Tensor, einsum
	from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, TypeVar, Union
	from einops import rearrange
	import math
	import comfy.ops

	class LearnedPositionalEmbedding(nn.Module):
	"""Used for continuous time"""

	def __init__(self, dim: int):
	super().__init__()
	assert (dim % 2) == 0
	half_dim = dim // 2
	self.weights = nn.Parameter(torch.empty(half_dim))

	def forward(self, x: Tensor) -> Tensor:
	x = rearrange(x, "b -> b 1")
	freqs = x * rearrange(self.weights, "d -> 1 d") * 2 * math.pi
	fouriered = torch.cat((freqs.sin(), freqs.cos()), dim=-1)
	fouriered = torch.cat((x, fouriered), dim=-1)
	return fouriered

	def TimePositionalEmbedding(dim: int, out_features: int) -> nn.Module:
	return nn.Sequential(
	LearnedPositionalEmbedding(dim),
	comfy.ops.manual_cast.Linear(in_features=dim + 1, out_features=out_features),
	)


	class NumberEmbedder(nn.Module):
	def __init__(
	self,
	features: int,
	dim: int = 256,
	):
	super().__init__()
	self.features = features
	self.embedding = TimePositionalEmbedding(dim=dim, out_features=features)

	def forward(self, x: Union[List[float], Tensor]) -> Tensor:
	if not torch.is_tensor(x):
	device = next(self.embedding.parameters()).device
	x = torch.tensor(x, device=device)
	assert isinstance(x, Tensor)
	shape = x.shape
	x = rearrange(x, "... -> (...)")
	embedding = self.embedding(x)
	x = embedding.view(*shape, self.features)
	return x # type: ignore


	class Conditioner(nn.Module):
	def __init__(
	self,
	dim: int,
	output_dim: int,
	project_out: bool = False
	):

	super().__init__()

	self.dim = dim
	self.output_dim = output_dim
	self.proj_out = nn.Linear(dim, output_dim) if (dim != output_dim or project_out) else nn.Identity()

	def forward(self, x):
	raise NotImplementedError()

	class NumberConditioner(Conditioner):
	'''
	Conditioner that takes a list of floats, normalizes them for a given range, and returns a list of embeddings
	'''
	def __init__(self,
	output_dim: int,
	min_val: float=0,
	max_val: float=1
	):
	super().__init__(output_dim, output_dim)

	self.min_val = min_val
	self.max_val = max_val

	self.embedder = NumberEmbedder(features=output_dim)

	def forward(self, floats, device=None):
	# Cast the inputs to floats
	floats = [float(x) for x in floats]

	if device is None:
	device = next(self.embedder.parameters()).device

	floats = torch.tensor(floats).to(device)

	floats = floats.clamp(self.min_val, self.max_val)

	normalized_floats = (floats - self.min_val) / (self.max_val - self.min_val)

	# Cast floats to same type as embedder
	embedder_dtype = next(self.embedder.parameters()).dtype
	normalized_floats = normalized_floats.to(embedder_dtype)

	float_embeds = self.embedder(normalized_floats).unsqueeze(1)

	return [float_embeds, torch.ones(float_embeds.shape[0], 1).to(device)]