Yuchan commited on
Commit
4a93f82
ยท
verified ยท
1 Parent(s): d916c7d

Update Mo.py

Browse files
Files changed (1) hide show
  1. Mo.py +11 -20
Mo.py CHANGED
@@ -14,23 +14,18 @@ tf.random.set_seed(SEED)
14
  np.random.seed(SEED)
15
 
16
  # TPU ์ดˆ๊ธฐํ™”
17
- try:
18
- resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu="local")
19
- tf.tpu.experimental.initialize_tpu_system(resolver)
20
- strategy = tf.distribute.TPUStrategy(resolver)
21
- print("โœ… TPU ์ดˆ๊ธฐํ™” ์™„๋ฃŒ:", resolver.cluster_spec().as_dict())
22
- on_tpu = True
23
-
24
- except Exception as e:
25
- print("โš ๏ธ TPU ๋ฏธ์‚ฌ์šฉ, GPU/CPU๋กœ ์ง„ํ–‰:", e)
 
26
  strategy = tf.distribute.get_strategy()
27
- on_tpu = False
28
-
29
- # Mixed precision
30
- from tensorflow.keras import mixed_precision
31
- policy = mixed_precision.Policy("mixed_bfloat16" if on_tpu else "float32")
32
- mixed_precision.set_global_policy(policy)
33
- print("โœ… Mixed precision:", policy)
34
 
35
  # =======================
36
  # 1) ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ
@@ -237,9 +232,6 @@ def masked_perplexity(y_true, y_pred, eps=0.1):
237
  mean_loss = tf.reduce_sum(per_tok) / (tf.reduce_sum(mask) + 1e-8)
238
  return tf.exp(mean_loss)
239
 
240
- # =======================
241
- # ๋ชจ๋ธ ์ƒ์„ฑ & ์ปดํŒŒ์ผ
242
- # =======================
243
  with strategy.scope():
244
  model = LaSLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=384, n_layers=3)
245
  dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
@@ -251,7 +243,6 @@ with strategy.scope():
251
 
252
  # ํ•™์Šต
253
  history = model.fit(dist_dataset, epochs=1, steps_per_epoch=steps_per_epoch, verbose=1)
254
-
255
  model.save_weights("tf_model.weights.h5")
256
  print("โœ… ๋ชจ๋ธ ๊ฐ€์ค‘์น˜ ์ €์žฅ ์™„๋ฃŒ!")
257
 
 
14
  np.random.seed(SEED)
15
 
16
  # TPU ์ดˆ๊ธฐํ™”
17
+ gpus = tf.config.list_physical_devices('GPU')
18
+ if gpus:
19
+ try:
20
+ for gpu in gpus:
21
+ tf.config.experimental.set_memory_growth(gpu, True)
22
+ strategy = tf.distribute.MirroredStrategy(devices=[f"/GPU:{i}" for i in range(len(gpus))])
23
+ print(f"โœ… GPU {len(gpus)}๊ฐœ ์‚ฌ์šฉ: {strategy.num_replicas_in_sync} replicas")
24
+ except RuntimeError as e:
25
+ print("โš ๏ธ GPU ์„ค์ • ์—๋Ÿฌ:", e)
26
+ else:
27
  strategy = tf.distribute.get_strategy()
28
+ print("โš ๏ธ GPU ์—†์Œ, CPU ์‚ฌ์šฉ")
 
 
 
 
 
 
29
 
30
  # =======================
31
  # 1) ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ
 
232
  mean_loss = tf.reduce_sum(per_tok) / (tf.reduce_sum(mask) + 1e-8)
233
  return tf.exp(mean_loss)
234
 
 
 
 
235
  with strategy.scope():
236
  model = LaSLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=384, n_layers=3)
237
  dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
 
243
 
244
  # ํ•™์Šต
245
  history = model.fit(dist_dataset, epochs=1, steps_per_epoch=steps_per_epoch, verbose=1)
 
246
  model.save_weights("tf_model.weights.h5")
247
  print("โœ… ๋ชจ๋ธ ๊ฐ€์ค‘์น˜ ์ €์žฅ ์™„๋ฃŒ!")
248