Spaces:
Running
Running
File size: 3,384 Bytes
a8f17ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import os
import sys
import time
import numpy as np
import torch
# Ensure project root is in path
sys.path.append(os.getcwd())
import torch.nn.functional as F
import torch.optim as optim
from ai.environments.rust_env_lite import RustEnvLite
from ai.models.training_config import INPUT_SIZE, POLICY_SIZE
from ai.training.train import AlphaNet
def benchmark():
print("========================================================")
print(" LovecaSim AlphaZero Benchmark (Lite Rust Env) ")
print("========================================================")
# Configuration
NUM_ENVS = int(os.getenv("BENCH_ENVS", "256"))
TOTAL_STEPS = int(os.getenv("BENCH_STEPS", "200"))
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f" [Bench] Device: {DEVICE}")
print(f" [Bench] Envs: {NUM_ENVS}")
print(f" [Bench] Steps: {TOTAL_STEPS}")
print(f" [Bench] Obs Dim: {INPUT_SIZE}")
# 1. Initialize Simplified Environment
print(" [Bench] Initializing Rust Engine (Lite)...")
env = RustEnvLite(num_envs=NUM_ENVS)
obs = env.reset()
# 2. Initialize Model
print(" [Bench] Initializing AlphaNet...")
model = AlphaNet(policy_size=POLICY_SIZE).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
obs_tensor = torch.zeros((NUM_ENVS, INPUT_SIZE), dtype=torch.float32).to(DEVICE)
obs_tensor.requires_grad = True # Enable grad for stress testing
# 3. Benchmark Loop
print(" [Bench] Starting Training Loop...")
start_time = time.time()
total_samples = 0
for step in range(1, TOTAL_STEPS + 1):
# A. Sync Obs to GPU
with torch.no_grad():
obs_tensor.copy_(torch.from_numpy(obs))
# B. Inference
policy_logits, value = model(obs_tensor)
# C. Action Selection (Sample from logits)
# Gradient is detached for sampling
with torch.no_grad():
probs = F.softmax(policy_logits, dim=1)
actions = torch.multinomial(probs, 1).cpu().numpy().flatten().astype(np.int32)
# D. Environment Step
obs, rewards, dones, done_indices = env.step(actions)
# E. Dummy Training Step (Simulate backward pass stress)
if step % 5 == 0:
optimizer.zero_grad()
# Dummy target for benchmarking
p_loss = policy_logits.mean()
v_loss = value.mean()
loss = p_loss + v_loss
loss.backward()
optimizer.step()
total_samples += NUM_ENVS
if step % 50 == 0 or step == TOTAL_STEPS:
elapsed = time.time() - start_time
sps = total_samples / elapsed if elapsed > 0 else 0
print(f" [Bench] Step {step}/{TOTAL_STEPS} | SPS: {sps:.0f}")
end_time = time.time()
duration = end_time - start_time
final_sps = total_samples / duration
print("\n========================================================")
print(" [Result] Benchmark Completed!")
print(f" [Result] Total Time: {duration:.2f}s")
print(f" [Result] Total Samples: {total_samples}")
print(f" [Result] Final SPS: {final_sps:.2f}")
print("========================================================")
if __name__ == "__main__":
benchmark()
|