OpenLab-NLP
/

HyperConv-Layer

Model card Files Files and versions

OpenLab-NLP commited on Dec 15, 2025

Commit

cebebd8

·

verified ·

1 Parent(s): bd2afe4

Update V2.py

Files changed (1) hide show

V2.py +0 -39

V2.py CHANGED Viewed

@@ -128,45 +128,6 @@ ds = ds.batch(BATCH_SIZE, drop_remainder=True)
 ds = ds.map(lambda v1, v2: ((v1, v2), tf.zeros([BATCH_SIZE], dtype=tf.float32)), num_parallel_calls=tf.data.AUTOTUNE)
 ds = ds.prefetch(tf.data.AUTOTUNE)
-class DynamicConvTPU(layers.Layer):
-    def __init__(self, d_model, k=7):
-        super().__init__()
-        assert k % 2 == 1
-        self.k = k
-        self.d_model = d_model
-        self.dense = layers.Dense(d_model, activation='silu')
-        self.proj = layers.Dense(d_model)
-        self.generator = layers.Dense(k, dtype='float32')
-    def call(self, x):
-        x_in = x
-        x = tf.cast(x, tf.float32)
-        B, L, D = tf.shape(x)[0], tf.shape(x)[1], tf.shape(x)[2]
-        # 1) token-wise kernel 생성
-        kernels = self.generator(self.dense(x))  # (B, L, k)
-        kernels = tf.nn.softmax(kernels, axis=-1)
-        kernels_exp = tf.expand_dims(kernels, axis=-1)  # (B, L, k, 1)
-        # 2) 패딩 및 shifted patch 생성 (벡터화)
-        pad = (self.k - 1) // 2
-        x_pad = tf.pad(x, [[0,0],[pad,pad],[0,0]])  # (B, L+k-1, D)
-        # shifted patches 한 번에 생성
-        idx = tf.range(self.k)[None, :, None] + tf.range(L)[:, None, None]  # (L, k, 1)
-        idx = tf.broadcast_to(idx, [B, L, self.k]) + tf.zeros([B, L, self.k], dtype=tf.int32)  # (B,L,k)
-        batch_idx = tf.reshape(tf.range(B)[:, None, None], [B,1,1])
-        batch_idx = tf.broadcast_to(batch_idx, [B,L,self.k])
-        patches = tf.gather(x_pad, idx, axis=1, batch_dims=1)  # (B, L, k, D)
-        # 3) token-wise weighted sum
-        out = tf.reduce_sum(patches * kernels_exp, axis=2)  # (B, L, D)
-        out = self.proj(out)
-        return tf.cast(out, x_in.dtype)
 class HyperConv1D(layers.Layer):
     def __init__(self, d_model, k=7, mem_size=64, hyper_dim=128, dropout=0.0):

 ds = ds.map(lambda v1, v2: ((v1, v2), tf.zeros([BATCH_SIZE], dtype=tf.float32)), num_parallel_calls=tf.data.AUTOTUNE)
 ds = ds.prefetch(tf.data.AUTOTUNE)
 class HyperConv1D(layers.Layer):
     def __init__(self, d_model, k=7, mem_size=64, hyper_dim=128, dropout=0.0):