# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "torch>=2.0.0",
#     "pytorch-lightning>=2.0.0",
#     "datasets>=2.10.0",
#     "transformers>=4.30.0",
#     "torchdiffeq>=0.2.3",
#     "POT>=0.9.0",
#     "einops>=0.6.0",
#     "numpy>=1.24.0",
#     "scipy>=1.10.0",
#     "rich",
#     "huggingface-hub",
#     "setuptools",
#     "ninja",
# ]
# ///

import os, sys, subprocess, torch, time

t0 = time.time()
print("=" * 60)
print("SEM V6 HuggingFace Jobs - Environment Test")
print("=" * 60)

print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    props = torch.cuda.get_device_properties(0)
    print(f"Memory: {props.total_memory / 1e9:.1f} GB")

# Clone repo
print("\nCloning training code...")
from huggingface_hub import snapshot_download
repo_path = snapshot_download(
    "icarus112/sem-v6-training",
    token=os.environ.get("HF_TOKEN"),
)
print(f"Repo: {repo_path}")

# Check for nvcc
nvcc_check = subprocess.run(["which", "nvcc"], capture_output=True, text=True)
print(f"\nnvcc: {nvcc_check.stdout.strip() or 'NOT FOUND'}")

# Build ChebyKAN CUDA extension
chebykan_dir = os.path.join(repo_path, "ChebyKan_cuda_op")
if os.path.isdir(chebykan_dir):
    print(f"\nBuilding ChebyKAN CUDA extension...")
    result = subprocess.run(
        [sys.executable, "setup.py", "build_ext", "--inplace"],
        cwd=chebykan_dir,
        capture_output=True, text=True,
        timeout=300,
    )
    print(f"Build stdout (last 10 lines):")
    for line in result.stdout.strip().split("\n")[-10:]:
        print(f"  {line}")
    if result.returncode != 0:
        print(f"Build stderr (last 10 lines):")
        for line in result.stderr.strip().split("\n")[-10:]:
            print(f"  {line}")
        print(f"Build FAILED (rc={result.returncode})")
    else:
        print("Build OK!")

# Set up paths
sys.path.insert(0, os.path.join(repo_path, "src"))
sys.path.insert(0, chebykan_dir)

print("\nTesting imports...")
from sem_v6.sem_v6 import BioPlausibleCrystal
print("BioPlausibleCrystal imported!")

if torch.cuda.is_available():
    device = torch.device("cuda")
    model = BioPlausibleCrystal(
        sdr_dim=1368, latent_dim=256, kan_degree=3,
        sparsity=0.05, embed_dim=512
    ).to(device)
    result = model("Hello world", reward=1.0, mode="awake")
    print(f"Forward pass OK: tuple len={len(result)}")
    print(f"GPU mem: {torch.cuda.max_memory_allocated() / 1e6:.0f} MB")

elapsed = time.time() - t0
print(f"\nTotal time: {elapsed:.1f}s")
print("\n" + "=" * 60)
print("TEST PASSED - Ready for training!")
print("=" * 60)