Spaces:
Running
Running
| import tensorflow as tf | |
| from tensorflow.keras import layers | |
| class PatchExtract(layers.Layer): | |
| def __init__(self, patch_size, **kwargs): | |
| super().__init__(**kwargs) | |
| self.patch_size_x = patch_size[0] | |
| self.patch_size_y = patch_size[0] | |
| def call(self, images): | |
| batch_size = tf.shape(images)[0] | |
| patches = tf.image.extract_patches( | |
| images=images, | |
| sizes=(1, self.patch_size_x, self.patch_size_y, 1), | |
| strides=(1, self.patch_size_x, self.patch_size_y, 1), | |
| rates=(1, 1, 1, 1), | |
| padding="VALID", | |
| ) | |
| patch_dim = patches.shape[-1] | |
| patch_num = patches.shape[1] | |
| return tf.reshape(patches, (batch_size, patch_num * patch_num, patch_dim)) | |
| def get_config(self): | |
| config = super().get_config() | |
| config.update( | |
| { | |
| "patch_size_y": self.patch_size_y, | |
| "patch_size_x": self.patch_size_x, | |
| } | |
| ) | |
| return config | |
| class PatchEmbedding(layers.Layer): | |
| def __init__(self, num_patch, embed_dim, **kwargs): | |
| super().__init__(**kwargs) | |
| self.num_patch = num_patch | |
| self.proj = layers.Dense(embed_dim) | |
| self.pos_embed = layers.Embedding(input_dim=num_patch, output_dim=embed_dim) | |
| def call(self, patch): | |
| pos = tf.range(start=0, limit=self.num_patch, delta=1) | |
| return self.proj(patch) + self.pos_embed(pos) | |
| def get_config(self): | |
| config = super().get_config() | |
| config.update( | |
| { | |
| "num_patch": self.num_patch, | |
| } | |
| ) | |
| return config | |
| class PatchMerging(layers.Layer): | |
| def __init__(self, num_patch, embed_dim): | |
| super().__init__() | |
| self.num_patch = num_patch | |
| self.embed_dim = embed_dim | |
| self.linear_trans = layers.Dense(2 * embed_dim, use_bias=False) | |
| def call(self, x): | |
| height, width = self.num_patch | |
| _, _, C = x.get_shape().as_list() | |
| x = tf.reshape(x, shape=(-1, height, width, C)) | |
| feat_maps = x | |
| x0 = x[:, 0::2, 0::2, :] | |
| x1 = x[:, 1::2, 0::2, :] | |
| x2 = x[:, 0::2, 1::2, :] | |
| x3 = x[:, 1::2, 1::2, :] | |
| x = tf.concat((x0, x1, x2, x3), axis=-1) | |
| x = tf.reshape(x, shape=(-1, (height // 2) * (width // 2), 4 * C)) | |
| return self.linear_trans(x), feat_maps | |
| def get_config(self): | |
| config = super().get_config() | |
| config.update({"num_patch": self.num_patch, "embed_dim": self.embed_dim}) | |
| return config | |