Spaces:

trioskosmos
/

LovecaSim

Sleeping

App Files Files Community

trioskosmos commited on Feb 3

Commit

a8f17ae

verified ·

1 Parent(s): 5a40dcb

Upload ai/training/benchmark_train.py with huggingface_hub

Browse files

Files changed (1) hide show

ai/training/benchmark_train.py +99 -0

ai/training/benchmark_train.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import os
+import sys
+import time
+import numpy as np
+import torch
+# Ensure project root is in path
+sys.path.append(os.getcwd())
+import torch.nn.functional as F
+import torch.optim as optim
+from ai.environments.rust_env_lite import RustEnvLite
+from ai.models.training_config import INPUT_SIZE, POLICY_SIZE
+from ai.training.train import AlphaNet
+def benchmark():
+    print("========================================================")
+    print(" LovecaSim AlphaZero Benchmark (Lite Rust Env)          ")
+    print("========================================================")
+    # Configuration
+    NUM_ENVS = int(os.getenv("BENCH_ENVS", "256"))
+    TOTAL_STEPS = int(os.getenv("BENCH_STEPS", "200"))
+    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f" [Bench] Device:  {DEVICE}")
+    print(f" [Bench] Envs:    {NUM_ENVS}")
+    print(f" [Bench] Steps:   {TOTAL_STEPS}")
+    print(f" [Bench] Obs Dim: {INPUT_SIZE}")
+    # 1. Initialize Simplified Environment
+    print(" [Bench] Initializing Rust Engine (Lite)...")
+    env = RustEnvLite(num_envs=NUM_ENVS)
+    obs = env.reset()
+    # 2. Initialize Model
+    print(" [Bench] Initializing AlphaNet...")
+    model = AlphaNet(policy_size=POLICY_SIZE).to(DEVICE)
+    optimizer = optim.Adam(model.parameters(), lr=1e-4)
+    obs_tensor = torch.zeros((NUM_ENVS, INPUT_SIZE), dtype=torch.float32).to(DEVICE)
+    obs_tensor.requires_grad = True  # Enable grad for stress testing
+    # 3. Benchmark Loop
+    print(" [Bench] Starting Training Loop...")
+    start_time = time.time()
+    total_samples = 0
+    for step in range(1, TOTAL_STEPS + 1):
+        # A. Sync Obs to GPU
+        with torch.no_grad():
+            obs_tensor.copy_(torch.from_numpy(obs))
+        # B. Inference
+        policy_logits, value = model(obs_tensor)
+        # C. Action Selection (Sample from logits)
+        # Gradient is detached for sampling
+        with torch.no_grad():
+            probs = F.softmax(policy_logits, dim=1)
+            actions = torch.multinomial(probs, 1).cpu().numpy().flatten().astype(np.int32)
+        # D. Environment Step
+        obs, rewards, dones, done_indices = env.step(actions)
+        # E. Dummy Training Step (Simulate backward pass stress)
+        if step % 5 == 0:
+            optimizer.zero_grad()
+            # Dummy target for benchmarking
+            p_loss = policy_logits.mean()
+            v_loss = value.mean()
+            loss = p_loss + v_loss
+            loss.backward()
+            optimizer.step()
+        total_samples += NUM_ENVS
+        if step % 50 == 0 or step == TOTAL_STEPS:
+            elapsed = time.time() - start_time
+            sps = total_samples / elapsed if elapsed > 0 else 0
+            print(f" [Bench] Step {step}/{TOTAL_STEPS} | SPS: {sps:.0f}")
+    end_time = time.time()
+    duration = end_time - start_time
+    final_sps = total_samples / duration
+    print("\n========================================================")
+    print(" [Result] Benchmark Completed!")
+    print(f" [Result] Total Time:    {duration:.2f}s")
+    print(f" [Result] Total Samples: {total_samples}")
+    print(f" [Result] Final SPS:     {final_sps:.2f}")
+    print("========================================================")
+if __name__ == "__main__":
+    benchmark()