OpenLab-NLP commited on
Commit
38d8cee
·
verified ·
1 Parent(s): 42be625

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -50
app.py CHANGED
@@ -22,7 +22,7 @@ TOKENIZER_PATH = "bpe.model"
22
 
23
  if not os.path.exists(MODEL_PATH):
24
  download_file(
25
- "https://huggingface.co/OpenLab-NLP/openlem-prototype/resolve/main/encoder_simcse.weights.h5?download=true",
26
  MODEL_PATH
27
  )
28
 
@@ -50,56 +50,93 @@ def encode_sentence(sentence, max_len=MAX_LEN):
50
  def pad_sentence(tokens):
51
  return tokens + [pad_id]*(MAX_LEN - len(tokens))
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  class EncoderBlock(tf.keras.layers.Layer):
54
- def __init__(self, embed_dim=EMBED_DIM, ff_dim=1152, seq_len=MAX_LEN, drop_rate=DROP_RATE):
55
  super().__init__()
56
  self.embed_dim = embed_dim
57
  self.seq_len = seq_len
58
- self.drop_rate = drop_rate
59
 
60
- self.fc1 = layers.Dense(ff_dim*2)
 
61
  self.fc2 = layers.Dense(embed_dim)
62
- self.fc3 = layers.Dense(ff_dim//2)
63
- self.fc4 = layers.Dense(embed_dim)
64
-
65
- self.attn = layers.Dense(1)
66
- self.token_mixer = layers.Dense(seq_len)
67
- self.token_gate = layers.Dense(seq_len, activation='sigmoid')
68
 
69
- self.ln = layers.LayerNormalization(epsilon=1e-5)
70
- self.ln1 = layers.LayerNormalization(epsilon=1e-5)
71
- self.ln2 = layers.LayerNormalization(epsilon=1e-5)
72
- self.ln3 = layers.LayerNormalization(epsilon=1e-5)
73
- self.ln4 = layers.LayerNormalization(epsilon=1e-5)
74
 
75
- self.dropout = layers.Dropout(drop_rate)
 
 
 
76
 
77
- def call(self, x, mask, training=False):
 
78
  x_norm = self.ln(x)
79
 
80
- h = self.fc1(x_norm)
81
- g, v = tf.split(h, 2, axis=-1)
82
- h = tf.nn.silu(g) * v
83
- h = self.fc2(h)
84
 
85
- h = x + self.ln1(h)
 
86
 
87
- scores = self.attn(h)
88
- scores = tf.where(tf.equal(mask[..., tf.newaxis], 0), -1e9, scores)
89
- scores = tf.nn.softmax(scores, axis=1)
90
- attn = h + self.ln2(h * scores)
91
-
92
- v = tf.transpose(attn, [0, 2, 1])
93
- v = self.token_mixer(v) * self.token_gate(v)
94
- v = tf.transpose(v, [0, 2, 1])
95
 
96
- x_norm2 = attn + self.ln3(v)
97
- x = self.fc3(x_norm2)
98
- x = tf.nn.silu(x)
99
- x = self.fc4(x)
100
 
101
- x = self.dropout(x, training=training)
102
- return x_norm2 + self.ln4(x)
103
 
104
  class L2NormLayer(layers.Layer):
105
  def __init__(self, axis=1, epsilon=1e-10, **kwargs):
@@ -112,36 +149,34 @@ class L2NormLayer(layers.Layer):
112
  return {"axis": self.axis, "epsilon": self.epsilon, **super().get_config()}
113
 
114
  class SentenceEncoder(tf.keras.Model):
115
- def __init__(self, vocab_size, embed_dim=EMBED_DIM, latent_dim=LATENT_DIM, max_len=MAX_LEN, pad_id=pad_id, drop_rate=DROP_RATE):
116
  super().__init__()
117
  self.pad_id = pad_id
118
  self.embed = layers.Embedding(vocab_size, embed_dim)
119
  self.pos_embed = layers.Embedding(input_dim=max_len, output_dim=embed_dim)
120
- self.blocks = [EncoderBlock(embed_dim=embed_dim, drop_rate=drop_rate) for _ in range(2)]
121
  self.attn_pool = layers.Dense(1)
122
  self.ln_f = layers.LayerNormalization(epsilon=1e-5, dtype=tf.float32)
123
- self.latent = layers.Dense(latent_dim, activation=None)
124
- self.l2norm = L2NormLayer()
125
- self.drop_embed = layers.Dropout(drop_rate)
126
 
127
- def call(self, x, training=False):
128
  positions = tf.range(tf.shape(x)[1])[tf.newaxis, :]
129
  x_embed = self.embed(x) + self.pos_embed(positions)
130
- x_embed = self.drop_embed(x_embed, training=training)
131
-
132
  mask = tf.cast(tf.not_equal(x, self.pad_id), tf.float32)
133
- h = x_embed
134
  for block in self.blocks:
135
- h = block(h, mask, training=training)
136
- h = self.ln_f(h)
137
 
138
- scores = self.attn_pool(h)
139
  scores = tf.where(tf.equal(mask[..., tf.newaxis], 0), -1e9, scores)
140
  scores = tf.nn.softmax(scores, axis=1)
141
- pooled = tf.reduce_sum(h * scores, axis=1)
142
 
143
  latent = self.latent(pooled)
144
- return self.l2norm(latent)
 
145
  # 3️⃣ 모델 로드
146
  # ===============================
147
  encoder = SentenceEncoder(vocab_size=vocab_size)
 
22
 
23
  if not os.path.exists(MODEL_PATH):
24
  download_file(
25
+ "https://huggingface.co/OpenLab-NLP/openlem-prototype/resolve/main/encoder.weights.h5?download=true",
26
  MODEL_PATH
27
  )
28
 
 
50
  def pad_sentence(tokens):
51
  return tokens + [pad_id]*(MAX_LEN - len(tokens))
52
 
53
+ class DynamicConv(layers.Layer):
54
+ def __init__(self, k=7):
55
+ super().__init__()
56
+ assert k % 2 == 1, "kernel size should be odd for symmetric padding"
57
+ self.k = k
58
+ # generator는 각 토큰에 대해 k개의 로짓을 뱉음 -> softmax로 가중치화
59
+ self.generator = layers.Dense(k)
60
+
61
+ def call(self, x):
62
+ # x: (B, L, D)
63
+ B = tf.shape(x)[0]
64
+ L = tf.shape(x)[1]
65
+ D = tf.shape(x)[2]
66
+
67
+ # (B, L, k) logits -> softmax -> (B, L, k)
68
+ kernels = self.generator(x)
69
+ kernels = tf.nn.softmax(kernels, axis=-1)
70
+
71
+ # padding (same)
72
+ pad = (self.k - 1) // 2
73
+ x_pad = tf.pad(x, [[0, 0], [pad, pad], [0, 0]]) # (B, L+2pad, D)
74
+
75
+ # extract patches using tf.image.extract_patches:
76
+ # make 4D: (B, H=1, W=L+2pad, C=D)
77
+ x_pad_4d = tf.expand_dims(x_pad, axis=1)
78
+ patches = tf.image.extract_patches(
79
+ images=x_pad_4d,
80
+ sizes=[1, 1, self.k, 1],
81
+ strides=[1, 1, 1, 1],
82
+ rates=[1, 1, 1, 1],
83
+ padding='VALID'
84
+ ) # (B, 1, L, k*D)
85
+
86
+ # reshape -> (B, L, k, D)
87
+ patches = tf.reshape(patches, [B, 1, L, self.k * D])
88
+ patches = tf.squeeze(patches, axis=1)
89
+ patches = tf.reshape(patches, [B, L, self.k, D])
90
+
91
+ # kernels: (B, L, k) -> (B, L, k, 1)
92
+ kernels_exp = tf.expand_dims(kernels, axis=-1)
93
+
94
+ # weighted sum over kernel dim -> (B, L, D)
95
+ out = tf.reduce_sum(patches * kernels_exp, axis=2)
96
+
97
+ return out
98
+
99
  class EncoderBlock(tf.keras.layers.Layer):
100
+ def __init__(self, embed_dim=EMBED_DIM, ff_dim=1152, seq_len=MAX_LEN, num_conv_layers=2):
101
  super().__init__()
102
  self.embed_dim = embed_dim
103
  self.seq_len = seq_len
 
104
 
105
+ # MLP / FFN
106
+ self.fc1 = layers.Dense(ff_dim)
107
  self.fc2 = layers.Dense(embed_dim)
 
 
 
 
 
 
108
 
109
+ # DynamicConv 블록 여러 개 쌓기
110
+ self.blocks = [DynamicConv(k=7) for _ in range(num_conv_layers)]
 
 
 
111
 
112
+ # LayerNorm
113
+ self.ln = layers.LayerNormalization(epsilon=1e-5) # 입력 정규화
114
+ self.ln1 = layers.LayerNormalization(epsilon=1e-5) # Conv residual
115
+ self.ln2 = layers.LayerNormalization(epsilon=1e-5) # FFN residual
116
 
117
+ def call(self, x, mask=None):
118
+ # 입력 정규화
119
  x_norm = self.ln(x)
120
 
121
+ # DynamicConv 여러 층 통과
122
+ out = x_norm
123
+ for block in self.blocks:
124
+ out = block(out)
125
 
126
+ # Conv residual 연결
127
+ x = x_norm + self.ln1(out)
128
 
129
+ # FFN / GLU
130
+ v = out
131
+ h = self.fc1(v)
132
+ g, v_split = tf.split(h, 2, axis=-1)
133
+ h = tf.nn.silu(g) * v_split
134
+ h = self.fc2(h)
 
 
135
 
136
+ # FFN residual 연결
137
+ x = x + self.ln2(h)
 
 
138
 
139
+ return x
 
140
 
141
  class L2NormLayer(layers.Layer):
142
  def __init__(self, axis=1, epsilon=1e-10, **kwargs):
 
149
  return {"axis": self.axis, "epsilon": self.epsilon, **super().get_config()}
150
 
151
  class SentenceEncoder(tf.keras.Model):
152
+ def __init__(self, vocab_size, embed_dim=384, latent_dim=384, max_len=128, pad_id=pad_id):
153
  super().__init__()
154
  self.pad_id = pad_id
155
  self.embed = layers.Embedding(vocab_size, embed_dim)
156
  self.pos_embed = layers.Embedding(input_dim=max_len, output_dim=embed_dim)
157
+ self.blocks = [EncoderBlock() for _ in range(2)]
158
  self.attn_pool = layers.Dense(1)
159
  self.ln_f = layers.LayerNormalization(epsilon=1e-5, dtype=tf.float32)
160
+ self.latent = layers.Dense(latent_dim, activation=None) # tanh 제거
161
+ self.l2norm = L2NormLayer() # 추가
 
162
 
163
+ def call(self, x):
164
  positions = tf.range(tf.shape(x)[1])[tf.newaxis, :]
165
  x_embed = self.embed(x) + self.pos_embed(positions)
 
 
166
  mask = tf.cast(tf.not_equal(x, self.pad_id), tf.float32)
167
+ x = x_embed
168
  for block in self.blocks:
169
+ x = block(x, mask)
170
+ x = self.ln_f(x)
171
 
172
+ scores = self.attn_pool(x)
173
  scores = tf.where(tf.equal(mask[..., tf.newaxis], 0), -1e9, scores)
174
  scores = tf.nn.softmax(scores, axis=1)
175
+ pooled = tf.reduce_sum(x * scores, axis=1)
176
 
177
  latent = self.latent(pooled)
178
+ return self.l2norm(latent) # L2 정규화 후 반환
179
+
180
  # 3️⃣ 모델 로드
181
  # ===============================
182
  encoder = SentenceEncoder(vocab_size=vocab_size)