| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import os, sys, subprocess, torch, time |
|
|
| t0 = time.time() |
| print("=" * 60) |
| print("SEM V6 HuggingFace Jobs - Environment Test") |
| print("=" * 60) |
|
|
| print(f"CUDA available: {torch.cuda.is_available()}") |
| if torch.cuda.is_available(): |
| print(f"GPU: {torch.cuda.get_device_name(0)}") |
| props = torch.cuda.get_device_properties(0) |
| print(f"Memory: {props.total_memory / 1e9:.1f} GB") |
|
|
| |
| print("\nCloning training code...") |
| from huggingface_hub import snapshot_download |
| repo_path = snapshot_download( |
| "icarus112/sem-v6-training", |
| token=os.environ.get("HF_TOKEN"), |
| ) |
| print(f"Repo: {repo_path}") |
|
|
| |
| nvcc_check = subprocess.run(["which", "nvcc"], capture_output=True, text=True) |
| print(f"\nnvcc: {nvcc_check.stdout.strip() or 'NOT FOUND'}") |
|
|
| |
| chebykan_dir = os.path.join(repo_path, "ChebyKan_cuda_op") |
| if os.path.isdir(chebykan_dir): |
| print(f"\nBuilding ChebyKAN CUDA extension...") |
| result = subprocess.run( |
| [sys.executable, "setup.py", "build_ext", "--inplace"], |
| cwd=chebykan_dir, |
| capture_output=True, text=True, |
| timeout=300, |
| ) |
| print(f"Build stdout (last 10 lines):") |
| for line in result.stdout.strip().split("\n")[-10:]: |
| print(f" {line}") |
| if result.returncode != 0: |
| print(f"Build stderr (last 10 lines):") |
| for line in result.stderr.strip().split("\n")[-10:]: |
| print(f" {line}") |
| print(f"Build FAILED (rc={result.returncode})") |
| else: |
| print("Build OK!") |
|
|
| |
| sys.path.insert(0, os.path.join(repo_path, "src")) |
| sys.path.insert(0, chebykan_dir) |
|
|
| print("\nTesting imports...") |
| from sem_v6.sem_v6 import BioPlausibleCrystal |
| print("BioPlausibleCrystal imported!") |
|
|
| if torch.cuda.is_available(): |
| device = torch.device("cuda") |
| model = BioPlausibleCrystal( |
| sdr_dim=1368, latent_dim=256, kan_degree=3, |
| sparsity=0.05, embed_dim=512 |
| ).to(device) |
| result = model("Hello world", reward=1.0, mode="awake") |
| print(f"Forward pass OK: tuple len={len(result)}") |
| print(f"GPU mem: {torch.cuda.max_memory_allocated() / 1e6:.0f} MB") |
|
|
| elapsed = time.time() - t0 |
| print(f"\nTotal time: {elapsed:.1f}s") |
| print("\n" + "=" * 60) |
| print("TEST PASSED - Ready for training!") |
| print("=" * 60) |
|
|