OpenLab-NLP commited on
Commit
a74af7f
·
verified ·
1 Parent(s): 03540a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -67
app.py CHANGED
@@ -22,13 +22,13 @@ TOKENIZER_PATH = "bpe.model"
22
 
23
  if not os.path.exists(MODEL_PATH):
24
  download_file(
25
- "https://huggingface.co/OpenLab-NLP/openlem2-retrieval-qa/resolve/main/encoder_fit.weights.h5?download=true",
26
  MODEL_PATH
27
  )
28
 
29
  if not os.path.exists(TOKENIZER_PATH):
30
  download_file(
31
- "https://huggingface.co/OpenLab-NLP/openlem2-retrieval-qa/resolve/main/bpe.model?download=true",
32
  TOKENIZER_PATH
33
  )
34
 
@@ -60,82 +60,70 @@ def pad_sentence(tokens):
60
  return tokens + [pad_id]*(MAX_LEN - len(tokens))
61
 
62
 
63
- class DynamicConv(layers.Layer):
64
- def __init__(self, d_model, k=7):
65
  super().__init__()
66
  assert k % 2 == 1
67
  self.k = k
68
- self.dense = layers.Dense(d_model, activation='silu')
69
- self.proj = layers.Dense(d_model)
70
- self.generator = layers.Dense(k, dtype='float32')
71
- def call(self, x):
72
- x_in = x
73
- x = tf.cast(x, tf.float32)
74
 
75
- B = tf.shape(x)[0]
76
- L = tf.shape(x)[1]
77
- D = tf.shape(x)[2]
78
 
79
- kernels = self.generator(self.dense(x))
80
- kernels = tf.nn.softmax(kernels, axis=-1)
 
81
 
82
- pad = (self.k - 1) // 2
83
- x_pad = tf.pad(x, [[0,0],[pad,pad],[0,0]])
 
 
 
84
 
85
- x_pad_4d = tf.expand_dims(x_pad, axis=1)
86
- patches = tf.image.extract_patches(
87
- images=x_pad_4d,
88
- sizes=[1,1,self.k,1],
89
- strides=[1,1,1,1],
90
- rates=[1,1,1,1],
91
- padding='VALID'
92
- )
93
- patches = tf.reshape(patches, [B, L, self.k, D])
94
 
95
- kernels_exp = tf.expand_dims(kernels, axis=-1)
96
- out = tf.reduce_sum(patches * kernels_exp, axis=2)
97
- out = self.proj(out)
98
 
99
- # 🔥 원래 dtype으로 돌려줌
100
- return tf.cast(out, x_in.dtype)
 
101
 
102
- class EncoderBlock(tf.keras.layers.Layer):
103
- def __init__(self, embed_dim=EMBED_DIM, ff_dim=1152, seq_len=MAX_LEN, num_conv_layers=2):
104
- super().__init__()
105
- self.embed_dim = embed_dim
106
- self.seq_len = seq_len
107
 
108
- # MLP / FFN
109
- self.fc1 = layers.Dense(ff_dim)
110
- self.fc2 = layers.Dense(embed_dim)
111
- self.blocks = [DynamicConv(d_model=embed_dim, k=7) for _ in range(num_conv_layers)]
112
- # LayerNorm
113
- self.ln = layers.LayerNormalization(epsilon=1e-5) # 입력 정규화
114
- self.ln1 = layers.LayerNormalization(epsilon=1e-5) # Conv residual
115
- self.ln2 = layers.LayerNormalization(epsilon=1e-5) # FFN residual
116
-
117
- def call(self, x, mask=None):
118
- # 입력 정규화
119
- x_norm = self.ln(x)
120
-
121
- # DynamicConv 여러 층 통과
122
- out = x_norm
123
- for block in self.blocks: out = block(out)
124
- # Conv residual 연결
125
- x = x_norm + self.ln1(out)
126
-
127
- # FFN / GLU
128
- v = out
129
- h = self.fc1(v)
130
- g, v_split = tf.split(h, 2, axis=-1)
131
- h = tf.nn.silu(g) * v_split
132
- h = self.fc2(h)
133
 
134
- # FFN residual 연결
135
- x = x + self.ln2(h)
136
 
137
- return x
 
 
 
 
138
 
 
 
 
 
 
 
 
139
 
140
  class L2NormLayer(layers.Layer):
141
  def __init__(self, axis=1, epsilon=1e-10, **kwargs):
@@ -145,18 +133,20 @@ class L2NormLayer(layers.Layer):
145
  def call(self, inputs):
146
  return tf.math.l2_normalize(inputs, axis=self.axis, epsilon=self.epsilon)
147
 
148
- class SentenceEncoder(tf.keras.Model):
149
  def __init__(self, vocab_size, embed_dim=EMBED_DIM, latent_dim=LATENT_DIM, max_len=MAX_LEN, pad_id=pad_id, dropout_rate=EMBED_DROPOUT):
150
  super().__init__()
151
  self.pad_id = pad_id
152
  self.embed = layers.Embedding(vocab_size, embed_dim)
153
  self.pos_embed = layers.Embedding(input_dim=max_len, output_dim=embed_dim)
154
  self.dropout = layers.Dropout(dropout_rate)
155
- self.blocks = [EncoderBlock() for _ in range(2)]
156
  self.attn_pool = layers.Dense(1)
157
  self.ln_f = layers.LayerNormalization(epsilon=1e-5, dtype=tf.float32)
158
  self.latent = layers.Dense(latent_dim, activation=None)
159
  self.l2norm = L2NormLayer(axis=1)
 
 
160
 
161
  def call(self, x, training=None):
162
  positions = tf.range(tf.shape(x)[1])[tf.newaxis, :]
@@ -167,8 +157,13 @@ class SentenceEncoder(tf.keras.Model):
167
 
168
  h = x_embed
169
  for block in self.blocks:
170
- h = block(h, training=training)
171
 
 
 
 
 
 
172
  h = self.ln_f(h)
173
 
174
  # 🔥 scores를 float32 강제
 
22
 
23
  if not os.path.exists(MODEL_PATH):
24
  download_file(
25
+ "https://huggingface.co/OpenLab-NLP/openlem3-retrieval-qa/resolve/main/encoder_fit.weights.h5?download=true",
26
  MODEL_PATH
27
  )
28
 
29
  if not os.path.exists(TOKENIZER_PATH):
30
  download_file(
31
+ "https://huggingface.co/OpenLab-NLP/openlem3-retrieval-qa/resolve/main/bpe.model?download=true",
32
  TOKENIZER_PATH
33
  )
34
 
 
60
  return tokens + [pad_id]*(MAX_LEN - len(tokens))
61
 
62
 
63
+ class HyperConv1D(layers.Layer):
64
+ def __init__(self, d_model, k=7, mem_size=64, hyper_dim=128, dropout=0.0):
65
  super().__init__()
66
  assert k % 2 == 1
67
  self.k = k
68
+ self.d_model = d_model
69
+ self.mem_size = mem_size
 
 
 
 
70
 
71
+ # Input projection
72
+ self.input_proj = layers.Dense(d_model, name="input_proj")
 
73
 
74
+ # Local depthwise conv
75
+ self.local_conv = layers.DepthwiseConv1D(kernel_size=k, padding='same', activation='silu')
76
+ self.local_proj = layers.Dense(d_model, name="local_proj")
77
 
78
+ # Hypernetwork: global -> scale vector
79
+ self.hyper = tf.keras.Sequential([
80
+ layers.Dense(hyper_dim, activation='gelu'),
81
+ layers.Dense(d_model)
82
+ ], name="hyper")
83
 
84
+ # Associative memory
85
+ self.mem_keys = self.add_weight((mem_size, d_model), initializer='glorot_uniform', trainable=True)
86
+ self.mem_vals = self.add_weight((mem_size, d_model), initializer='glorot_uniform', trainable=True)
87
+ self.mem_proj = layers.Dense(d_model)
 
 
 
 
 
88
 
89
+ self.norm = layers.LayerNormalization()
90
+ self.attn_pool = layers.Dense(1)
 
91
 
92
+ def call(self, x):
93
+ x_in = x
94
+ x_dtype = x.dtype # 입력 dtype 기억
95
 
96
+ # 1) input projection
97
+ x_proj = self.input_proj(x)
98
+ # memory와 연산 위해 dtype 통일
99
+ mem_dtype = self.mem_keys.dtype
100
+ x_proj = tf.cast(x_proj, mem_dtype)
101
 
102
+ # 2) local conv
103
+ out_local = self.local_conv(x_proj)
104
+ # hypernetwork scaling
105
+ global_z = self.attn_pool(x_proj)
106
+ global_z = tf.nn.softmax(global_z, axis=1)
107
+ global_z = tf.reduce_sum(x_proj * global_z, axis=1)
108
+
109
+ scale = tf.expand_dims(tf.nn.sigmoid(self.hyper(global_z)), 1)
110
+ out_local = out_local * scale
111
+ out_local = self.local_proj(out_local)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
 
 
113
 
114
+ # 3) associative memory
115
+ sims = tf.matmul(x_proj, self.mem_keys, transpose_b=True) / tf.math.sqrt(tf.cast(self.d_model, mem_dtype))
116
+ attn = tf.nn.softmax(sims, axis=-1)
117
+ mem_read = tf.matmul(attn, self.mem_vals)
118
+ mem_read = self.mem_proj(mem_read)
119
 
120
+ # 4) fuse & residual
121
+ out = out_local + mem_read
122
+ out = self.norm(x_proj + out)
123
+ out = tf.nn.silu(out)
124
+
125
+ # 최종 출력 dtype 원래 입력 dtype으로 캐스트
126
+ return tf.cast(out, x_dtype)
127
 
128
  class L2NormLayer(layers.Layer):
129
  def __init__(self, axis=1, epsilon=1e-10, **kwargs):
 
133
  def call(self, inputs):
134
  return tf.math.l2_normalize(inputs, axis=self.axis, epsilon=self.epsilon)
135
 
136
+ class SentenceEncoder(Model):
137
  def __init__(self, vocab_size, embed_dim=EMBED_DIM, latent_dim=LATENT_DIM, max_len=MAX_LEN, pad_id=pad_id, dropout_rate=EMBED_DROPOUT):
138
  super().__init__()
139
  self.pad_id = pad_id
140
  self.embed = layers.Embedding(vocab_size, embed_dim)
141
  self.pos_embed = layers.Embedding(input_dim=max_len, output_dim=embed_dim)
142
  self.dropout = layers.Dropout(dropout_rate)
143
+ self.blocks = [HyperConv1D(d_model=embed_dim, k=7, mem_size=128, hyper_dim=256) for _ in range(4)]
144
  self.attn_pool = layers.Dense(1)
145
  self.ln_f = layers.LayerNormalization(epsilon=1e-5, dtype=tf.float32)
146
  self.latent = layers.Dense(latent_dim, activation=None)
147
  self.l2norm = L2NormLayer(axis=1)
148
+ self.fc1 = layers.Dense(1152)
149
+ self.fc2 = layers.Dense(embed_dim)
150
 
151
  def call(self, x, training=None):
152
  positions = tf.range(tf.shape(x)[1])[tf.newaxis, :]
 
157
 
158
  h = x_embed
159
  for block in self.blocks:
160
+ h = block(h)
161
 
162
+ v = h
163
+ h = self.fc1(v)
164
+ g, v_split = tf.split(h, 2, axis=-1)
165
+ h = tf.nn.silu(g) * v_split
166
+ h = self.fc2(h)
167
  h = self.ln_f(h)
168
 
169
  # 🔥 scores를 float32 강제