OpenLab-NLP commited on
Commit
77840d0
·
verified ·
1 Parent(s): 5d9caf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -48
app.py CHANGED
@@ -32,9 +32,9 @@ if not os.path.exists(TOKENIZER_PATH):
32
  TOKENIZER_PATH
33
  )
34
 
35
- MAX_LEN = 128
36
- EMBED_DIM = 384
37
- LATENT_DIM = 384
38
  DROP_RATE = 0.1
39
 
40
  # ===============================
@@ -50,51 +50,45 @@ def encode_sentence(sentence, max_len=MAX_LEN):
50
  def pad_sentence(tokens):
51
  return tokens + [pad_id]*(MAX_LEN - len(tokens))
52
 
 
53
  class DynamicConv(layers.Layer):
54
- def __init__(self, k=7):
55
  super().__init__()
56
- assert k % 2 == 1, "kernel size should be odd for symmetric padding"
57
  self.k = k
58
- # generator는 토큰에 대해 k개의 로짓을 뱉음 -> softmax로 가중치화
59
- self.generator = layers.Dense(k)
60
-
61
  def call(self, x):
62
- # x: (B, L, D)
 
 
63
  B = tf.shape(x)[0]
64
  L = tf.shape(x)[1]
65
  D = tf.shape(x)[2]
66
 
67
- # (B, L, k) logits -> softmax -> (B, L, k)
68
- kernels = self.generator(x)
69
  kernels = tf.nn.softmax(kernels, axis=-1)
70
 
71
- # padding (same)
72
  pad = (self.k - 1) // 2
73
- x_pad = tf.pad(x, [[0, 0], [pad, pad], [0, 0]]) # (B, L+2pad, D)
74
 
75
- # extract patches using tf.image.extract_patches:
76
- # make 4D: (B, H=1, W=L+2pad, C=D)
77
  x_pad_4d = tf.expand_dims(x_pad, axis=1)
78
  patches = tf.image.extract_patches(
79
  images=x_pad_4d,
80
- sizes=[1, 1, self.k, 1],
81
- strides=[1, 1, 1, 1],
82
- rates=[1, 1, 1, 1],
83
  padding='VALID'
84
- ) # (B, 1, L, k*D)
85
-
86
- # reshape -> (B, L, k, D)
87
- patches = tf.reshape(patches, [B, 1, L, self.k * D])
88
- patches = tf.squeeze(patches, axis=1)
89
  patches = tf.reshape(patches, [B, L, self.k, D])
90
 
91
- # kernels: (B, L, k) -> (B, L, k, 1)
92
  kernels_exp = tf.expand_dims(kernels, axis=-1)
93
-
94
- # weighted sum over kernel dim -> (B, L, D)
95
  out = tf.reduce_sum(patches * kernels_exp, axis=2)
 
96
 
97
- return out
 
98
 
99
  class EncoderBlock(tf.keras.layers.Layer):
100
  def __init__(self, embed_dim=EMBED_DIM, ff_dim=1152, seq_len=MAX_LEN, num_conv_layers=2):
@@ -105,10 +99,7 @@ class EncoderBlock(tf.keras.layers.Layer):
105
  # MLP / FFN
106
  self.fc1 = layers.Dense(ff_dim)
107
  self.fc2 = layers.Dense(embed_dim)
108
-
109
- # DynamicConv 블록 여러 개 쌓기
110
- self.blocks = [DynamicConv(k=7) for _ in range(num_conv_layers)]
111
-
112
  # LayerNorm
113
  self.ln = layers.LayerNormalization(epsilon=1e-5) # 입력 정규화
114
  self.ln1 = layers.LayerNormalization(epsilon=1e-5) # Conv residual
@@ -120,9 +111,7 @@ class EncoderBlock(tf.keras.layers.Layer):
120
 
121
  # DynamicConv 여러 층 통과
122
  out = x_norm
123
- for block in self.blocks:
124
- out = block(out)
125
-
126
  # Conv residual 연결
127
  x = x_norm + self.ln1(out)
128
 
@@ -138,6 +127,7 @@ class EncoderBlock(tf.keras.layers.Layer):
138
 
139
  return x
140
 
 
141
  class L2NormLayer(layers.Layer):
142
  def __init__(self, axis=1, epsilon=1e-10, **kwargs):
143
  super().__init__(**kwargs)
@@ -145,37 +135,46 @@ class L2NormLayer(layers.Layer):
145
  self.epsilon = epsilon
146
  def call(self, inputs):
147
  return tf.math.l2_normalize(inputs, axis=self.axis, epsilon=self.epsilon)
148
- def get_config(self):
149
- return {"axis": self.axis, "epsilon": self.epsilon, **super().get_config()}
150
 
151
- class SentenceEncoder(tf.keras.Model):
152
- def __init__(self, vocab_size, embed_dim=384, latent_dim=384, max_len=128, pad_id=pad_id):
153
  super().__init__()
154
  self.pad_id = pad_id
155
  self.embed = layers.Embedding(vocab_size, embed_dim)
156
  self.pos_embed = layers.Embedding(input_dim=max_len, output_dim=embed_dim)
 
157
  self.blocks = [EncoderBlock() for _ in range(2)]
158
  self.attn_pool = layers.Dense(1)
159
  self.ln_f = layers.LayerNormalization(epsilon=1e-5, dtype=tf.float32)
160
- self.latent = layers.Dense(latent_dim, activation=None) # tanh 제거
161
- self.l2norm = L2NormLayer() # 추가
162
 
163
- def call(self, x):
164
  positions = tf.range(tf.shape(x)[1])[tf.newaxis, :]
165
  x_embed = self.embed(x) + self.pos_embed(positions)
 
 
166
  mask = tf.cast(tf.not_equal(x, self.pad_id), tf.float32)
167
- x = x_embed
 
168
  for block in self.blocks:
169
- x = block(x, mask)
170
- x = self.ln_f(x)
 
171
 
172
- scores = self.attn_pool(x)
173
- scores = tf.where(tf.equal(mask[..., tf.newaxis], 0), -1e9, scores)
 
 
 
174
  scores = tf.nn.softmax(scores, axis=1)
175
- pooled = tf.reduce_sum(x * scores, axis=1)
176
 
 
177
  latent = self.latent(pooled)
178
- return self.l2norm(latent) # L2 정규화 후 반환
 
 
 
179
 
180
  # 3️⃣ 모델 로드
181
  # ===============================
 
32
  TOKENIZER_PATH
33
  )
34
 
35
+ MAX_LEN = 384
36
+ EMBED_DIM = 512
37
+ LATENT_DIM = 512
38
  DROP_RATE = 0.1
39
 
40
  # ===============================
 
50
  def pad_sentence(tokens):
51
  return tokens + [pad_id]*(MAX_LEN - len(tokens))
52
 
53
+
54
  class DynamicConv(layers.Layer):
55
+ def __init__(self, d_model, k=7):
56
  super().__init__()
57
+ assert k % 2 == 1
58
  self.k = k
59
+ self.dense = layers.Dense(d_model, activation='silu')
60
+ self.proj = layers.Dense(d_model)
61
+ self.generator = layers.Dense(k, dtype='float32')
62
  def call(self, x):
63
+ x_in = x
64
+ x = tf.cast(x, tf.float32)
65
+
66
  B = tf.shape(x)[0]
67
  L = tf.shape(x)[1]
68
  D = tf.shape(x)[2]
69
 
70
+ kernels = self.generator(self.dense(x))
 
71
  kernels = tf.nn.softmax(kernels, axis=-1)
72
 
 
73
  pad = (self.k - 1) // 2
74
+ x_pad = tf.pad(x, [[0,0],[pad,pad],[0,0]])
75
 
 
 
76
  x_pad_4d = tf.expand_dims(x_pad, axis=1)
77
  patches = tf.image.extract_patches(
78
  images=x_pad_4d,
79
+ sizes=[1,1,self.k,1],
80
+ strides=[1,1,1,1],
81
+ rates=[1,1,1,1],
82
  padding='VALID'
83
+ )
 
 
 
 
84
  patches = tf.reshape(patches, [B, L, self.k, D])
85
 
 
86
  kernels_exp = tf.expand_dims(kernels, axis=-1)
 
 
87
  out = tf.reduce_sum(patches * kernels_exp, axis=2)
88
+ out = self.proj(out)
89
 
90
+ # 🔥 원래 dtype으로 돌려줌
91
+ return tf.cast(out, x_in.dtype)
92
 
93
  class EncoderBlock(tf.keras.layers.Layer):
94
  def __init__(self, embed_dim=EMBED_DIM, ff_dim=1152, seq_len=MAX_LEN, num_conv_layers=2):
 
99
  # MLP / FFN
100
  self.fc1 = layers.Dense(ff_dim)
101
  self.fc2 = layers.Dense(embed_dim)
102
+ self.blocks = [DynamicConv(d_model=embed_dim, k=7) for _ in range(num_conv_layers)]
 
 
 
103
  # LayerNorm
104
  self.ln = layers.LayerNormalization(epsilon=1e-5) # 입력 정규화
105
  self.ln1 = layers.LayerNormalization(epsilon=1e-5) # Conv residual
 
111
 
112
  # DynamicConv 여러 층 통과
113
  out = x_norm
114
+ for block in self.blocks: out = block(out)
 
 
115
  # Conv residual 연결
116
  x = x_norm + self.ln1(out)
117
 
 
127
 
128
  return x
129
 
130
+
131
  class L2NormLayer(layers.Layer):
132
  def __init__(self, axis=1, epsilon=1e-10, **kwargs):
133
  super().__init__(**kwargs)
 
135
  self.epsilon = epsilon
136
  def call(self, inputs):
137
  return tf.math.l2_normalize(inputs, axis=self.axis, epsilon=self.epsilon)
 
 
138
 
139
+ class SentenceEncoder(Model):
140
+ def __init__(self, vocab_size, embed_dim=EMBED_DIM, latent_dim=LATENT_DIM, max_len=MAX_LEN, pad_id=pad_id, dropout_rate=EMBED_DROPOUT):
141
  super().__init__()
142
  self.pad_id = pad_id
143
  self.embed = layers.Embedding(vocab_size, embed_dim)
144
  self.pos_embed = layers.Embedding(input_dim=max_len, output_dim=embed_dim)
145
+ self.dropout = layers.Dropout(dropout_rate)
146
  self.blocks = [EncoderBlock() for _ in range(2)]
147
  self.attn_pool = layers.Dense(1)
148
  self.ln_f = layers.LayerNormalization(epsilon=1e-5, dtype=tf.float32)
149
+ self.latent = layers.Dense(latent_dim, activation=None)
150
+ self.l2norm = L2NormLayer(axis=1)
151
 
152
+ def call(self, x, training=None):
153
  positions = tf.range(tf.shape(x)[1])[tf.newaxis, :]
154
  x_embed = self.embed(x) + self.pos_embed(positions)
155
+ x_embed = self.dropout(x_embed, training=training)
156
+
157
  mask = tf.cast(tf.not_equal(x, self.pad_id), tf.float32)
158
+
159
+ h = x_embed
160
  for block in self.blocks:
161
+ h = block(h, training=training)
162
+
163
+ h = self.ln_f(h)
164
 
165
+ # 🔥 scores float32 강제
166
+ scores = self.attn_pool(h)
167
+ scores = tf.cast(scores, tf.float32)
168
+
169
+ scores = tf.where(mask[..., tf.newaxis] == 0, tf.constant(-1e9, tf.float32), scores)
170
  scores = tf.nn.softmax(scores, axis=1)
 
171
 
172
+ pooled = tf.reduce_sum(h * scores, axis=1)
173
  latent = self.latent(pooled)
174
+ latent = self.l2norm(latent)
175
+
176
+ # 🔥 출력만 float32
177
+ return tf.cast(latent, tf.float32)
178
 
179
  # 3️⃣ 모델 로드
180
  # ===============================