File size: 2,653 Bytes
7188d52 8b3e3d3 7188d52 8b3e3d3 7188d52 8b3e3d3 7188d52 8b3e3d3 7188d52 8b3e3d3 0277ac3 8b3e3d3 7188d52 8b3e3d3 0277ac3 7188d52 8b3e3d3 7188d52 8b3e3d3 7188d52 8b3e3d3 0277ac3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | # /// script
# requires-python = ">=3.10"
# dependencies = [
# "torch>=2.0.0",
# "pytorch-lightning>=2.0.0",
# "datasets>=2.10.0",
# "transformers>=4.30.0",
# "torchdiffeq>=0.2.3",
# "POT>=0.9.0",
# "einops>=0.6.0",
# "numpy>=1.24.0",
# "scipy>=1.10.0",
# "rich",
# "huggingface-hub",
# "setuptools",
# "ninja",
# ]
# ///
import os, sys, subprocess, torch, time
t0 = time.time()
print("=" * 60)
print("SEM V6 HuggingFace Jobs - Environment Test")
print("=" * 60)
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"GPU: {torch.cuda.get_device_name(0)}")
props = torch.cuda.get_device_properties(0)
print(f"Memory: {props.total_memory / 1e9:.1f} GB")
# Clone repo
print("\nCloning training code...")
from huggingface_hub import snapshot_download
repo_path = snapshot_download(
"icarus112/sem-v6-training",
token=os.environ.get("HF_TOKEN"),
)
print(f"Repo: {repo_path}")
# Check for nvcc
nvcc_check = subprocess.run(["which", "nvcc"], capture_output=True, text=True)
print(f"\nnvcc: {nvcc_check.stdout.strip() or 'NOT FOUND'}")
# Build ChebyKAN CUDA extension
chebykan_dir = os.path.join(repo_path, "ChebyKan_cuda_op")
if os.path.isdir(chebykan_dir):
print(f"\nBuilding ChebyKAN CUDA extension...")
result = subprocess.run(
[sys.executable, "setup.py", "build_ext", "--inplace"],
cwd=chebykan_dir,
capture_output=True, text=True,
timeout=300,
)
print(f"Build stdout (last 10 lines):")
for line in result.stdout.strip().split("\n")[-10:]:
print(f" {line}")
if result.returncode != 0:
print(f"Build stderr (last 10 lines):")
for line in result.stderr.strip().split("\n")[-10:]:
print(f" {line}")
print(f"Build FAILED (rc={result.returncode})")
else:
print("Build OK!")
# Set up paths
sys.path.insert(0, os.path.join(repo_path, "src"))
sys.path.insert(0, chebykan_dir)
print("\nTesting imports...")
from sem_v6.sem_v6 import BioPlausibleCrystal
print("BioPlausibleCrystal imported!")
if torch.cuda.is_available():
device = torch.device("cuda")
model = BioPlausibleCrystal(
sdr_dim=1368, latent_dim=256, kan_degree=3,
sparsity=0.05, embed_dim=512
).to(device)
result = model("Hello world", reward=1.0, mode="awake")
print(f"Forward pass OK: tuple len={len(result)}")
print(f"GPU mem: {torch.cuda.max_memory_allocated() / 1e6:.0f} MB")
elapsed = time.time() - t0
print(f"\nTotal time: {elapsed:.1f}s")
print("\n" + "=" * 60)
print("TEST PASSED - Ready for training!")
print("=" * 60)
|