# /// script # requires-python = ">=3.10" # dependencies = [ # "torch>=2.0.0", # "pytorch-lightning>=2.0.0", # "datasets>=2.10.0", # "transformers>=4.30.0", # "torchdiffeq>=0.2.3", # "POT>=0.9.0", # "einops>=0.6.0", # "numpy>=1.24.0", # "scipy>=1.10.0", # "rich", # "huggingface-hub", # "setuptools", # "ninja", # ] # /// import os, sys, subprocess, torch, time t0 = time.time() print("=" * 60) print("SEM V6 HuggingFace Jobs - Environment Test") print("=" * 60) print(f"CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"GPU: {torch.cuda.get_device_name(0)}") props = torch.cuda.get_device_properties(0) print(f"Memory: {props.total_memory / 1e9:.1f} GB") # Clone repo print("\nCloning training code...") from huggingface_hub import snapshot_download repo_path = snapshot_download( "icarus112/sem-v6-training", token=os.environ.get("HF_TOKEN"), ) print(f"Repo: {repo_path}") # Check for nvcc nvcc_check = subprocess.run(["which", "nvcc"], capture_output=True, text=True) print(f"\nnvcc: {nvcc_check.stdout.strip() or 'NOT FOUND'}") # Build ChebyKAN CUDA extension chebykan_dir = os.path.join(repo_path, "ChebyKan_cuda_op") if os.path.isdir(chebykan_dir): print(f"\nBuilding ChebyKAN CUDA extension...") result = subprocess.run( [sys.executable, "setup.py", "build_ext", "--inplace"], cwd=chebykan_dir, capture_output=True, text=True, timeout=300, ) print(f"Build stdout (last 10 lines):") for line in result.stdout.strip().split("\n")[-10:]: print(f" {line}") if result.returncode != 0: print(f"Build stderr (last 10 lines):") for line in result.stderr.strip().split("\n")[-10:]: print(f" {line}") print(f"Build FAILED (rc={result.returncode})") else: print("Build OK!") # Set up paths sys.path.insert(0, os.path.join(repo_path, "src")) sys.path.insert(0, chebykan_dir) print("\nTesting imports...") from sem_v6.sem_v6 import BioPlausibleCrystal print("BioPlausibleCrystal imported!") if torch.cuda.is_available(): device = torch.device("cuda") model = BioPlausibleCrystal( sdr_dim=1368, latent_dim=256, kan_degree=3, sparsity=0.05, embed_dim=512 ).to(device) result = model("Hello world", reward=1.0, mode="awake") print(f"Forward pass OK: tuple len={len(result)}") print(f"GPU mem: {torch.cuda.max_memory_allocated() / 1e6:.0f} MB") elapsed = time.time() - t0 print(f"\nTotal time: {elapsed:.1f}s") print("\n" + "=" * 60) print("TEST PASSED - Ready for training!") print("=" * 60)