Spaces:
Running
on
Zero
Running
on
Zero
| import torch | |
| import numpy as np | |
| class SliderProjector(torch.nn.Module): | |
| def __init__( | |
| self, | |
| out_dim, # Dimension of the output token that the projector will generate | |
| pe_dim, # The dimension of positional embedding that will be applied | |
| n_layers = 4, | |
| is_clip_input = True, # This function will check whether the clip embeddings are the input of the projector net or not | |
| ): | |
| super().__init__() | |
| self.out_dim = out_dim | |
| self.pe_dim = pe_dim | |
| self.is_clip_input = is_clip_input | |
| # Add the layers here in defining, assume n_layers is another parameter | |
| layers = [] | |
| pe_extender_dim = 768 | |
| # if the clip embeddings are to be passed along with the input of the slider scalar value, we will increase the dimensions of the input of the projector net | |
| if is_clip_input: | |
| in_dim = pe_extender_dim + 768 | |
| else: | |
| in_dim = pe_extender_dim | |
| # iterating over the layers and accumulating the layers in a list for defining the model | |
| for i in range(n_layers - 1): | |
| layers.append(torch.nn.Linear(in_dim, out_dim)) | |
| layers.append(torch.nn.ReLU()) | |
| in_dim = out_dim | |
| layers.append(torch.nn.Linear(in_dim, out_dim)) | |
| # a simple linear layer to extend the pe into a higher dimensional space | |
| self.pe_extender = torch.nn.Linear(pe_dim, 768) | |
| # then we will pass it through a projector network | |
| self.projector = torch.nn.Sequential(*layers) | |
| # A simple encoding function for the scalar input for a pe embedding | |
| def posEnc(self, s): | |
| pe = torch.stack([torch.sin(torch.pi * s), torch.cos(torch.pi * s)], dim=-1) | |
| return pe | |
| # A forward function that will take the input x and then projects it to a token embedding to condition the diffusion model. | |
| def forward(self, s, clip_embeddings = None): | |
| # Apply the positional embedding to the input scalar | |
| x_pe = self.posEnc(s) | |
| x_scale_embedding = self.pe_extender(x_pe) # (1, 768) | |
| if clip_embeddings is not None: # if the clip input is passed, we will concatenated it with the scalar embeddings for processing | |
| # print("clip embeddings shape: {}".format(clip_embeddings.shape)) | |
| x_combined_embedding = torch.cat([x_scale_embedding, clip_embeddings], dim=-1) # (1, 768 + 768) | |
| x_proj = self.projector(x_combined_embedding) | |
| # print("x proj shape: {}".format(x_proj.shape)) | |
| return x_proj | |
| class SliderProjector_wo_clip(torch.nn.Module): | |
| def __init__( | |
| self, | |
| out_dim, # Dimension of the output token that the projector will generate | |
| pe_dim, # The dimension of positional embedding | |
| n_layers = 4, | |
| is_clip_input = False, # This function will check whether the clip embeddings are the input of the projector net or not | |
| ): | |
| super().__init__() | |
| self.out_dim = out_dim | |
| self.pe_dim = pe_dim | |
| # Add the layers here in defining, assume n_layers is another parameter | |
| layers = [] | |
| pe_extender_dim = 768 | |
| # extending the input dimenstion to the 768 with a linear layer to keep the dimensions consistent with other clip based model. | |
| in_dim = pe_extender_dim | |
| # iterating over the layers and accumulating the layers in a list for defining the model | |
| for i in range(n_layers - 1): | |
| layers.append(torch.nn.Linear(in_dim, out_dim)) | |
| layers.append(torch.nn.ReLU()) | |
| in_dim = out_dim | |
| layers.append(torch.nn.Linear(in_dim, out_dim)) | |
| # adding a pe extender to have the same dimension as clip embeddings | |
| self.pe_extender = torch.nn.Linear(pe_dim, 768) | |
| # then we will pass it through a projector network | |
| self.projector = torch.nn.Sequential(*layers) | |
| def posEnc(self, s): | |
| pe = torch.stack([torch.sin(torch.pi * s), torch.cos(torch.pi * s)], dim=-1) | |
| return pe | |
| # A forward function that will take the input x and then projects it to a token embedding to condition the diffusion model. | |
| def forward(self, s): | |
| x_pe = self.posEnc(s) | |
| x_scale_embedding = self.pe_extender(x_pe) | |
| x_proj = self.projector(x_scale_embedding) | |
| return x_proj | |