Yuchan
commited on
Update Mo.py
Browse files
Mo.py
CHANGED
|
@@ -14,23 +14,18 @@ tf.random.set_seed(SEED)
|
|
| 14 |
np.random.seed(SEED)
|
| 15 |
|
| 16 |
# TPU ์ด๊ธฐํ
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
except
|
| 25 |
-
|
|
|
|
| 26 |
strategy = tf.distribute.get_strategy()
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
# Mixed precision
|
| 30 |
-
from tensorflow.keras import mixed_precision
|
| 31 |
-
policy = mixed_precision.Policy("mixed_bfloat16" if on_tpu else "float32")
|
| 32 |
-
mixed_precision.set_global_policy(policy)
|
| 33 |
-
print("โ
Mixed precision:", policy)
|
| 34 |
|
| 35 |
# =======================
|
| 36 |
# 1) ํ์ผ ๋ค์ด๋ก๋
|
|
@@ -237,9 +232,6 @@ def masked_perplexity(y_true, y_pred, eps=0.1):
|
|
| 237 |
mean_loss = tf.reduce_sum(per_tok) / (tf.reduce_sum(mask) + 1e-8)
|
| 238 |
return tf.exp(mean_loss)
|
| 239 |
|
| 240 |
-
# =======================
|
| 241 |
-
# ๋ชจ๋ธ ์์ฑ & ์ปดํ์ผ
|
| 242 |
-
# =======================
|
| 243 |
with strategy.scope():
|
| 244 |
model = LaSLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=384, n_layers=3)
|
| 245 |
dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
|
|
@@ -251,7 +243,6 @@ with strategy.scope():
|
|
| 251 |
|
| 252 |
# ํ์ต
|
| 253 |
history = model.fit(dist_dataset, epochs=1, steps_per_epoch=steps_per_epoch, verbose=1)
|
| 254 |
-
|
| 255 |
model.save_weights("tf_model.weights.h5")
|
| 256 |
print("โ
๋ชจ๋ธ ๊ฐ์ค์น ์ ์ฅ ์๋ฃ!")
|
| 257 |
|
|
|
|
| 14 |
np.random.seed(SEED)
|
| 15 |
|
| 16 |
# TPU ์ด๊ธฐํ
|
| 17 |
+
gpus = tf.config.list_physical_devices('GPU')
|
| 18 |
+
if gpus:
|
| 19 |
+
try:
|
| 20 |
+
for gpu in gpus:
|
| 21 |
+
tf.config.experimental.set_memory_growth(gpu, True)
|
| 22 |
+
strategy = tf.distribute.MirroredStrategy(devices=[f"/GPU:{i}" for i in range(len(gpus))])
|
| 23 |
+
print(f"โ
GPU {len(gpus)}๊ฐ ์ฌ์ฉ: {strategy.num_replicas_in_sync} replicas")
|
| 24 |
+
except RuntimeError as e:
|
| 25 |
+
print("โ ๏ธ GPU ์ค์ ์๋ฌ:", e)
|
| 26 |
+
else:
|
| 27 |
strategy = tf.distribute.get_strategy()
|
| 28 |
+
print("โ ๏ธ GPU ์์, CPU ์ฌ์ฉ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# =======================
|
| 31 |
# 1) ํ์ผ ๋ค์ด๋ก๋
|
|
|
|
| 232 |
mean_loss = tf.reduce_sum(per_tok) / (tf.reduce_sum(mask) + 1e-8)
|
| 233 |
return tf.exp(mean_loss)
|
| 234 |
|
|
|
|
|
|
|
|
|
|
| 235 |
with strategy.scope():
|
| 236 |
model = LaSLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=384, n_layers=3)
|
| 237 |
dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
|
|
|
|
| 243 |
|
| 244 |
# ํ์ต
|
| 245 |
history = model.fit(dist_dataset, epochs=1, steps_per_epoch=steps_per_epoch, verbose=1)
|
|
|
|
| 246 |
model.save_weights("tf_model.weights.h5")
|
| 247 |
print("โ
๋ชจ๋ธ ๊ฐ์ค์น ์ ์ฅ ์๋ฃ!")
|
| 248 |
|