trioskosmos commited on
Commit
a8f17ae
·
verified ·
1 Parent(s): 5a40dcb

Upload ai/training/benchmark_train.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. ai/training/benchmark_train.py +99 -0
ai/training/benchmark_train.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+
5
+ import numpy as np
6
+ import torch
7
+
8
+ # Ensure project root is in path
9
+ sys.path.append(os.getcwd())
10
+
11
+ import torch.nn.functional as F
12
+ import torch.optim as optim
13
+
14
+ from ai.environments.rust_env_lite import RustEnvLite
15
+ from ai.models.training_config import INPUT_SIZE, POLICY_SIZE
16
+ from ai.training.train import AlphaNet
17
+
18
+
19
+ def benchmark():
20
+ print("========================================================")
21
+ print(" LovecaSim AlphaZero Benchmark (Lite Rust Env) ")
22
+ print("========================================================")
23
+
24
+ # Configuration
25
+ NUM_ENVS = int(os.getenv("BENCH_ENVS", "256"))
26
+ TOTAL_STEPS = int(os.getenv("BENCH_STEPS", "200"))
27
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28
+
29
+ print(f" [Bench] Device: {DEVICE}")
30
+ print(f" [Bench] Envs: {NUM_ENVS}")
31
+ print(f" [Bench] Steps: {TOTAL_STEPS}")
32
+ print(f" [Bench] Obs Dim: {INPUT_SIZE}")
33
+
34
+ # 1. Initialize Simplified Environment
35
+ print(" [Bench] Initializing Rust Engine (Lite)...")
36
+ env = RustEnvLite(num_envs=NUM_ENVS)
37
+ obs = env.reset()
38
+
39
+ # 2. Initialize Model
40
+ print(" [Bench] Initializing AlphaNet...")
41
+ model = AlphaNet(policy_size=POLICY_SIZE).to(DEVICE)
42
+ optimizer = optim.Adam(model.parameters(), lr=1e-4)
43
+
44
+ obs_tensor = torch.zeros((NUM_ENVS, INPUT_SIZE), dtype=torch.float32).to(DEVICE)
45
+ obs_tensor.requires_grad = True # Enable grad for stress testing
46
+
47
+ # 3. Benchmark Loop
48
+ print(" [Bench] Starting Training Loop...")
49
+ start_time = time.time()
50
+ total_samples = 0
51
+
52
+ for step in range(1, TOTAL_STEPS + 1):
53
+ # A. Sync Obs to GPU
54
+ with torch.no_grad():
55
+ obs_tensor.copy_(torch.from_numpy(obs))
56
+
57
+ # B. Inference
58
+ policy_logits, value = model(obs_tensor)
59
+
60
+ # C. Action Selection (Sample from logits)
61
+ # Gradient is detached for sampling
62
+ with torch.no_grad():
63
+ probs = F.softmax(policy_logits, dim=1)
64
+ actions = torch.multinomial(probs, 1).cpu().numpy().flatten().astype(np.int32)
65
+
66
+ # D. Environment Step
67
+ obs, rewards, dones, done_indices = env.step(actions)
68
+
69
+ # E. Dummy Training Step (Simulate backward pass stress)
70
+ if step % 5 == 0:
71
+ optimizer.zero_grad()
72
+ # Dummy target for benchmarking
73
+ p_loss = policy_logits.mean()
74
+ v_loss = value.mean()
75
+ loss = p_loss + v_loss
76
+ loss.backward()
77
+ optimizer.step()
78
+
79
+ total_samples += NUM_ENVS
80
+
81
+ if step % 50 == 0 or step == TOTAL_STEPS:
82
+ elapsed = time.time() - start_time
83
+ sps = total_samples / elapsed if elapsed > 0 else 0
84
+ print(f" [Bench] Step {step}/{TOTAL_STEPS} | SPS: {sps:.0f}")
85
+
86
+ end_time = time.time()
87
+ duration = end_time - start_time
88
+ final_sps = total_samples / duration
89
+
90
+ print("\n========================================================")
91
+ print(" [Result] Benchmark Completed!")
92
+ print(f" [Result] Total Time: {duration:.2f}s")
93
+ print(f" [Result] Total Samples: {total_samples}")
94
+ print(f" [Result] Final SPS: {final_sps:.2f}")
95
+ print("========================================================")
96
+
97
+
98
+ if __name__ == "__main__":
99
+ benchmark()