flax-community
/

transformer-vae

Fraser commited on Jul 9, 2021

Commit

6f4a0d9

1 Parent(s): 3497606

small fixes

Files changed (4) hide show

model/decoders.py CHANGED Viewed

@@ -12,10 +12,10 @@ class Decoder(nn.Module):
     n_latent_tokens: int
     @nn.compact
-    def __call__(self, latent_code):
-        raw_latent_tokens = nn.Linear(self.dim_model)(latent_code)
         latent_tokens = nn.LayerNorm()(raw_latent_tokens)
-        return latent_tokens  # (batch, n_latent_tokens, dim_model)
 VAE_DECODER_MODELS = {

     n_latent_tokens: int
     @nn.compact
+    def __call__(self, latent_code):  # (batch, latent_tokens_per_sequence, latent_token_dim)
+        raw_latent_tokens = nn.Dense(self.dim_model)(latent_code)
         latent_tokens = nn.LayerNorm()(raw_latent_tokens)
+        return latent_tokens  # (batch, latent_tokens_per_sequence, dim_model)
 VAE_DECODER_MODELS = {

model/encoders.py CHANGED Viewed

@@ -13,7 +13,7 @@ class Encoder(nn.Module):
     @nn.compact
     def __call__(self, encoding):
-        latent_tokens = nn.Linear(self.latent_size)(encoding)
         raw_latent_code = latent_tokens[:, : self.n_tokens, :]
         latent_code = nn.Tanh()(raw_latent_code)
         return latent_code  # (batch, latent_tokens_per_sequence, latent_token_dim)

     @nn.compact
     def __call__(self, encoding):
+        latent_tokens = nn.Dense(self.latent_size)(encoding)
         raw_latent_code = latent_tokens[:, : self.n_tokens, :]
         latent_code = nn.Tanh()(raw_latent_code)
         return latent_code  # (batch, latent_tokens_per_sequence, latent_token_dim)

model/t5_vae.py CHANGED Viewed

@@ -28,8 +28,7 @@ class FlaxT5_VAE_ForAutoencodingModule(nn.Module):
         return self.t5.decoder
     def setup(self):
-        self.model_dim = self.config.t5.d_model
-        self.t5 = FlaxT5ForConditionalGenerationModule(self.config)
         self.vae = VAE(self.config)
     def __call__(
@@ -79,7 +78,7 @@ class FlaxT5_VAE_ForAutoencodingModule(nn.Module):
         if self.config.tie_word_embeddings:
             # Rescale output before projecting on vocab
             # See https://github.com/tensorflow/mesh/blob/fa19d69eafc9a482aff0b59ddd96b025c0cb207d/mesh_tensorflow/transformer/transformer.py#L586
-            sequence_output = sequence_output * (self.model_dim ** -0.5)
         if self.config.tie_word_embeddings:
             shared_embedding = self.shared.variables["params"]["embedding"]

         return self.t5.decoder
     def setup(self):
+        self.t5 = FlaxT5ForConditionalGenerationModule(self.config.t5)
         self.vae = VAE(self.config)
     def __call__(
         if self.config.tie_word_embeddings:
             # Rescale output before projecting on vocab
             # See https://github.com/tensorflow/mesh/blob/fa19d69eafc9a482aff0b59ddd96b025c0cb207d/mesh_tensorflow/transformer/transformer.py#L586
+            sequence_output = sequence_output * (self.config.t5.d_model ** -0.5)
         if self.config.tie_word_embeddings:
             shared_embedding = self.shared.variables["params"]["embedding"]

model/vae.py CHANGED Viewed

@@ -17,8 +17,8 @@ class VAE(nn.Module):
     dtype: jnp.dtype = jnp.float32  # the dtype of the computation
     def setup(self):
-        self.encoder = VAE_ENCODER_MODELS[self.config.encoder](self.config.latent_size, self.config.n_latent_tokens)
-        self.decoder = VAE_DECODER_MODELS[self.config.decoder](self.config.dim_models, self.config.n_latent_tokens)
     def __call__(self, encoding=None, latent_codes=None):
         if latent_codes is None:

     dtype: jnp.dtype = jnp.float32  # the dtype of the computation
     def setup(self):
+        self.encoder = VAE_ENCODER_MODELS[self.config.vae_encoder_model](self.config.latent_size, self.config.n_latent_tokens)
+        self.decoder = VAE_DECODER_MODELS[self.config.vae_decoder_model](self.config.t5.d_model, self.config.n_latent_tokens)
     def __call__(self, encoding=None, latent_codes=None):
         if latent_codes is None: