OpenLab-NLP
/

model-prototype

Model card Files Files and versions

Yuchan commited on 27 days ago

Commit

4a93f82

·

verified ·

1 Parent(s): d916c7d

Update Mo.py

Files changed (1) hide show

Mo.py +11 -20

Mo.py CHANGED Viewed

@@ -14,23 +14,18 @@ tf.random.set_seed(SEED)
 np.random.seed(SEED)
 # TPU 초기화
-try:
-    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu="local")
-    tf.tpu.experimental.initialize_tpu_system(resolver)
-    strategy = tf.distribute.TPUStrategy(resolver)
-    print("✅ TPU 초기화 완료:", resolver.cluster_spec().as_dict())
-    on_tpu = True
-except Exception as e:
-    print("⚠️ TPU 미사용, GPU/CPU로 진행:", e)
     strategy = tf.distribute.get_strategy()
-    on_tpu = False
-# Mixed precision
-from tensorflow.keras import mixed_precision
-policy = mixed_precision.Policy("mixed_bfloat16" if on_tpu else "float32")
-mixed_precision.set_global_policy(policy)
-print("✅ Mixed precision:", policy)
 # =======================
 # 1) 파일 다운로드
@@ -237,9 +232,6 @@ def masked_perplexity(y_true, y_pred, eps=0.1):
     mean_loss = tf.reduce_sum(per_tok) / (tf.reduce_sum(mask) + 1e-8)
     return tf.exp(mean_loss)
-# =======================
-# 모델 생성 & 컴파일
-# =======================
 with strategy.scope():
     model = LaSLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=384, n_layers=3)
     dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
@@ -251,7 +243,6 @@ with strategy.scope():
     # 학습
     history = model.fit(dist_dataset, epochs=1, steps_per_epoch=steps_per_epoch, verbose=1)
 model.save_weights("tf_model.weights.h5")
 print("✅ 모델 가중치 저장 완료!")

 np.random.seed(SEED)
 # TPU 초기화
+gpus = tf.config.list_physical_devices('GPU')
+if gpus:
+    try:
+        for gpu in gpus:
+            tf.config.experimental.set_memory_growth(gpu, True)
+        strategy = tf.distribute.MirroredStrategy(devices=[f"/GPU:{i}" for i in range(len(gpus))])
+        print(f"✅ GPU {len(gpus)}개 사용: {strategy.num_replicas_in_sync} replicas")
+    except RuntimeError as e:
+        print("⚠️ GPU 설정 에러:", e)
+else:
     strategy = tf.distribute.get_strategy()
+    print("⚠️ GPU 없음, CPU 사용")
 # =======================
 # 1) 파일 다운로드
     mean_loss = tf.reduce_sum(per_tok) / (tf.reduce_sum(mask) + 1e-8)
     return tf.exp(mean_loss)
 with strategy.scope():
     model = LaSLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=384, n_layers=3)
     dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
     # 학습
     history = model.fit(dist_dataset, epochs=1, steps_per_epoch=steps_per_epoch, verbose=1)
 model.save_weights("tf_model.weights.h5")
 print("✅ 모델 가중치 저장 완료!")