Yuchan commited on
Commit
b917a71
ยท
verified ยท
1 Parent(s): 4a93f82

Update Mo.py

Browse files
Files changed (1) hide show
  1. Mo.py +18 -12
Mo.py CHANGED
@@ -6,7 +6,11 @@ import requests
6
  from tensorflow import keras
7
  from tensorflow.keras import layers
8
  import tensorflow.keras.backend as K
9
-
 
 
 
 
10
  print('1')
11
  tf.get_logger().setLevel("ERROR")
12
  SEED = 42
@@ -63,8 +67,8 @@ unk_id = sp.piece_to_id("<unk>")
63
  vocab_size = sp.get_piece_size()
64
  print(f"โœ… Vocabulary size: {vocab_size}")
65
 
66
- max_len = 256
67
- batch_size = 128
68
 
69
  def text_to_ids(text):
70
  return sp.encode(text, out_type=int)
@@ -169,20 +173,22 @@ class MHLA(layers.Layer):
169
  class Lo(layers.Layer):
170
  def __init__(self, d_model):
171
  super().__init__()
172
- self.d = layers.Dense(64, activation='silu')
173
- self.w = layers.Dense(d_model)
174
- self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
175
 
176
  def call(self, x):
177
  p = self.d(x)
178
  p = self.w(p)
179
- return self.norm(p) + x
 
 
180
 
181
  class Block(layers.Layer):
182
  def __init__(self, d_model):
183
  super().__init__()
184
  self.lou = MHLA(d_model, 8)
185
- self.glu = SwiGLU(d_model, 1154)
186
  self.lo = Lo(d_model)
187
 
188
  def call(self, x):
@@ -193,10 +199,10 @@ class Block(layers.Layer):
193
  class LaSLM(tf.keras.Model):
194
  def __init__(self, vocab_size, max_seq_len, d_model, n_layers, dropout_rate=0.1):
195
  super().__init__()
196
- self.token_embedding = layers.Embedding(vocab_size, d_model)
197
- self.pos_embedding = layers.Embedding(max_seq_len, d_model)
198
  self.blocks = [Block(d_model) for _ in range(n_layers)]
199
- self.ln_f = layers.LayerNormalization(epsilon=1e-5, dtype="float32")
200
 
201
  def call(self, x, training=False):
202
  batch_size, seq_len = tf.shape(x)[0], tf.shape(x)[1]
@@ -207,7 +213,7 @@ class LaSLM(tf.keras.Model):
207
  x = self.ln_f(x)
208
  embedding_matrix = tf.cast(self.token_embedding.embeddings, x.dtype)
209
  logits = tf.matmul(x, embedding_matrix, transpose_b=True)
210
- return tf.cast(logits, tf.float32)
211
 
212
  def smoothed_loss_keras(y_true, y_pred, eps=0.1):
213
  y_true = tf.cast(y_true, tf.int32)
 
6
  from tensorflow import keras
7
  from tensorflow.keras import layers
8
  import tensorflow.keras.backend as K
9
+ # ===============================
10
+ from tensorflow.keras import mixed_precision
11
+ policy = mixed_precision.Policy('mixed_float16') # fp16
12
+ mixed_precision.set_global_policy(policy)
13
+ print("โœ… Mixed precision ์ ์šฉ:", policy)
14
  print('1')
15
  tf.get_logger().setLevel("ERROR")
16
  SEED = 42
 
67
  vocab_size = sp.get_piece_size()
68
  print(f"โœ… Vocabulary size: {vocab_size}")
69
 
70
+ max_len = 200
71
+ batch_size = 96
72
 
73
  def text_to_ids(text):
74
  return sp.encode(text, out_type=int)
 
173
  class Lo(layers.Layer):
174
  def __init__(self, d_model):
175
  super().__init__()
176
+ self.d = layers.Dense(64, activation='silu', dtype='float16') # fp16 ์—ฐ์‚ฐ
177
+ self.w = layers.Dense(d_model, dtype='float16') # fp16 ์—ฐ์‚ฐ
178
+ self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32') # fp32
179
 
180
  def call(self, x):
181
  p = self.d(x)
182
  p = self.w(p)
183
+ p = self.norm(p) # fp32
184
+ return tf.cast(p, x.dtype) + x # ๋‹ค์‹œ fp16๋กœ ๋งž์ถฐ์„œ Add
185
+
186
 
187
  class Block(layers.Layer):
188
  def __init__(self, d_model):
189
  super().__init__()
190
  self.lou = MHLA(d_model, 8)
191
+ self.glu = SwiGLU(d_model, 1048)
192
  self.lo = Lo(d_model)
193
 
194
  def call(self, x):
 
199
  class LaSLM(tf.keras.Model):
200
  def __init__(self, vocab_size, max_seq_len, d_model, n_layers, dropout_rate=0.1):
201
  super().__init__()
202
+ self.token_embedding = layers.Embedding(vocab_size, d_model, dtype=policy.compute_dtype)
203
+ self.pos_embedding = layers.Embedding(max_seq_len, d_model, dtype=policy.compute_dtype)
204
  self.blocks = [Block(d_model) for _ in range(n_layers)]
205
+ self.ln_f = layers.LayerNormalization(epsilon=1e-5, dtype='float32') # ln_f๋Š” fp32
206
 
207
  def call(self, x, training=False):
208
  batch_size, seq_len = tf.shape(x)[0], tf.shape(x)[1]
 
213
  x = self.ln_f(x)
214
  embedding_matrix = tf.cast(self.token_embedding.embeddings, x.dtype)
215
  logits = tf.matmul(x, embedding_matrix, transpose_b=True)
216
+ return tf.cast(logits, tf.float32) # loss ๊ณ„์‚ฐ์„ ์œ„ํ•ด fp32๋กœ ๋ณ€ํ™˜
217
 
218
  def smoothed_loss_keras(y_true, y_pred, eps=0.1):
219
  y_true = tf.cast(y_true, tf.int32)