sem-v6-training / scripts /test_env.py
icarus112's picture
Upload scripts/test_env.py with huggingface_hub
8b3e3d3 verified
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "torch>=2.0.0",
# "pytorch-lightning>=2.0.0",
# "datasets>=2.10.0",
# "transformers>=4.30.0",
# "torchdiffeq>=0.2.3",
# "POT>=0.9.0",
# "einops>=0.6.0",
# "numpy>=1.24.0",
# "scipy>=1.10.0",
# "rich",
# "huggingface-hub",
# "setuptools",
# "ninja",
# ]
# ///
import os, sys, subprocess, torch, time
t0 = time.time()
print("=" * 60)
print("SEM V6 HuggingFace Jobs - Environment Test")
print("=" * 60)
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"GPU: {torch.cuda.get_device_name(0)}")
props = torch.cuda.get_device_properties(0)
print(f"Memory: {props.total_memory / 1e9:.1f} GB")
# Clone repo
print("\nCloning training code...")
from huggingface_hub import snapshot_download
repo_path = snapshot_download(
"icarus112/sem-v6-training",
token=os.environ.get("HF_TOKEN"),
)
print(f"Repo: {repo_path}")
# Check for nvcc
nvcc_check = subprocess.run(["which", "nvcc"], capture_output=True, text=True)
print(f"\nnvcc: {nvcc_check.stdout.strip() or 'NOT FOUND'}")
# Build ChebyKAN CUDA extension
chebykan_dir = os.path.join(repo_path, "ChebyKan_cuda_op")
if os.path.isdir(chebykan_dir):
print(f"\nBuilding ChebyKAN CUDA extension...")
result = subprocess.run(
[sys.executable, "setup.py", "build_ext", "--inplace"],
cwd=chebykan_dir,
capture_output=True, text=True,
timeout=300,
)
print(f"Build stdout (last 10 lines):")
for line in result.stdout.strip().split("\n")[-10:]:
print(f" {line}")
if result.returncode != 0:
print(f"Build stderr (last 10 lines):")
for line in result.stderr.strip().split("\n")[-10:]:
print(f" {line}")
print(f"Build FAILED (rc={result.returncode})")
else:
print("Build OK!")
# Set up paths
sys.path.insert(0, os.path.join(repo_path, "src"))
sys.path.insert(0, chebykan_dir)
print("\nTesting imports...")
from sem_v6.sem_v6 import BioPlausibleCrystal
print("BioPlausibleCrystal imported!")
if torch.cuda.is_available():
device = torch.device("cuda")
model = BioPlausibleCrystal(
sdr_dim=1368, latent_dim=256, kan_degree=3,
sparsity=0.05, embed_dim=512
).to(device)
result = model("Hello world", reward=1.0, mode="awake")
print(f"Forward pass OK: tuple len={len(result)}")
print(f"GPU mem: {torch.cuda.max_memory_allocated() / 1e6:.0f} MB")
elapsed = time.time() - t0
print(f"\nTotal time: {elapsed:.1f}s")
print("\n" + "=" * 60)
print("TEST PASSED - Ready for training!")
print("=" * 60)