Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """Generate sample text from Feather checkpoint to test SDR composition in output.""" | |
| import torch, os, sys | |
| from pathlib import Path | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| os.environ["LD_LIBRARY_PATH"] = "/usr/lib/wsl/lib:/usr/local/cuda/lib64" | |
| os.environ["CUDA_HOME"] = "/usr/local/cuda" | |
| os.environ["PATH"] = "/usr/local/cuda/bin:" + os.environ.get("PATH", "") | |
| os.environ["HYDRA_USE_NEMOTRON"] = "0" | |
| os.environ["HYDRA_USE_FULL_BLEND"] = "0" | |
| os.environ["HYDRA_SAMPLED_SOFTMAX"] = "0" | |
| os.environ["HYDRA_SOFTCAP_CLAMP"] = "0" | |
| from hydra.config import PostSemClawConfig | |
| from hydra.model import PostSemClawModel | |
| from prepare import Tokenizer | |
| CKPT = Path.home() / ".cache" / "autoresearch" / "latest.pt" | |
| print("[GEN] Loading checkpoint...") | |
| ckpt = torch.load(CKPT, map_location="cpu", weights_only=False) | |
| md = ckpt["model_state_dict"] | |
| cfg = ckpt["config"] | |
| conf = PostSemClawConfig(sequence_len=cfg["sequence_len"], vocab_size=cfg["vocab_size"], | |
| n_layer=cfg["n_layer"], d_model=cfg["d_model"], d_state=cfg["d_state"], | |
| headdim=cfg["headdim"], n_heads=cfg["d_model"]//cfg["headdim"], expand=cfg["expand"], | |
| engram_n_columns=cfg["engram_n_columns"], engram_key_dim=cfg["engram_key_dim"], | |
| engram_layer_idx=cfg["engram_layer_idx"], sdr_n_bits=cfg["sdr_n_bits"], | |
| sdr_target_active=cfg["sdr_target_active"], sdr_delta_rank=cfg["sdr_delta_rank"], | |
| sdr_som_warmup=cfg["sdr_som_warmup"], sdr_som_interval=cfg["sdr_som_interval"], | |
| htm_n_columns=cfg["htm_n_columns"], htm_cells_per_column=cfg["htm_cells_per_column"], | |
| label_smoothing=cfg.get("label_smoothing", 0.0), z_loss_weight=cfg.get("z_loss_weight", 0.0001)) | |
| print(f"[GEN] Building {cfg['n_layer']}L x {cfg['d_model']}D model (CPU)...") | |
| model = PostSemClawModel(conf).eval() | |
| model.load_state_dict(md, strict=False) | |
| p = sum(p.numel() for p in model.parameters())/1e6 | |
| print(f"[GEN] Loaded {p:.1f}M params") | |
| print("[GEN] Loading tokenizer...") | |
| tok = Tokenizer.from_directory(Path.home() / ".cache/autoresearch/tokenizer") | |
| BOS = tok.get_bos_token_id() or 0 | |
| print(f"[GEN] Vocab={tok.get_vocab_size()}, BOS={BOS}") | |
| max_n = 64; top_k = 40; temp = 1.0; device = "cpu" | |
| prompts = [ | |
| "The capital of France is", | |
| "The theory of relativity states that", | |
| "In the beginning,", | |
| ] | |
| for prompt in prompts: | |
| ids = torch.tensor([[BOS] + tok.encode(prompt)], device=device, dtype=torch.long) | |
| print(f"\n=== PROMPT: {prompt} ===") | |
| with torch.no_grad(): | |
| for step in range(max_n): | |
| # Cast to bfloat16 before forward (model weights are bf16) | |
| input_ids = ids[:, -100:].to(dtype=torch.bfloat16).long() if ids.dtype != torch.long else ids[:, -100:] | |
| logits = model(input_ids, targets=None)[0, -1, :].float() / temp | |
| vals, idxs = logits.topk(top_k) | |
| probs = torch.softmax(vals, dim=-1) | |
| nid = idxs[torch.multinomial(probs, 1)].item() | |
| ids = torch.cat([ids, torch.tensor([[nid]], device=device, dtype=torch.long)], dim=1) | |
| out = tok.decode(ids[0].tolist()) | |
| print(f"OUTPUT ({len(ids[0])} tokens): {out[:300]}") | |