#!/usr/bin/env python3 """Generate sample text from Feather checkpoint to test SDR composition in output.""" import torch, os, sys from pathlib import Path sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) os.environ["LD_LIBRARY_PATH"] = "/usr/lib/wsl/lib:/usr/local/cuda/lib64" os.environ["CUDA_HOME"] = "/usr/local/cuda" os.environ["PATH"] = "/usr/local/cuda/bin:" + os.environ.get("PATH", "") os.environ["HYDRA_USE_NEMOTRON"] = "0" os.environ["HYDRA_USE_FULL_BLEND"] = "0" os.environ["HYDRA_SAMPLED_SOFTMAX"] = "0" os.environ["HYDRA_SOFTCAP_CLAMP"] = "0" from hydra.config import PostSemClawConfig from hydra.model import PostSemClawModel from prepare import Tokenizer CKPT = Path.home() / ".cache" / "autoresearch" / "latest.pt" print("[GEN] Loading checkpoint...") ckpt = torch.load(CKPT, map_location="cpu", weights_only=False) md = ckpt["model_state_dict"] cfg = ckpt["config"] conf = PostSemClawConfig(sequence_len=cfg["sequence_len"], vocab_size=cfg["vocab_size"], n_layer=cfg["n_layer"], d_model=cfg["d_model"], d_state=cfg["d_state"], headdim=cfg["headdim"], n_heads=cfg["d_model"]//cfg["headdim"], expand=cfg["expand"], engram_n_columns=cfg["engram_n_columns"], engram_key_dim=cfg["engram_key_dim"], engram_layer_idx=cfg["engram_layer_idx"], sdr_n_bits=cfg["sdr_n_bits"], sdr_target_active=cfg["sdr_target_active"], sdr_delta_rank=cfg["sdr_delta_rank"], sdr_som_warmup=cfg["sdr_som_warmup"], sdr_som_interval=cfg["sdr_som_interval"], htm_n_columns=cfg["htm_n_columns"], htm_cells_per_column=cfg["htm_cells_per_column"], label_smoothing=cfg.get("label_smoothing", 0.0), z_loss_weight=cfg.get("z_loss_weight", 0.0001)) print(f"[GEN] Building {cfg['n_layer']}L x {cfg['d_model']}D model (CPU)...") model = PostSemClawModel(conf).eval() model.load_state_dict(md, strict=False) p = sum(p.numel() for p in model.parameters())/1e6 print(f"[GEN] Loaded {p:.1f}M params") print("[GEN] Loading tokenizer...") tok = Tokenizer.from_directory(Path.home() / ".cache/autoresearch/tokenizer") BOS = tok.get_bos_token_id() or 0 print(f"[GEN] Vocab={tok.get_vocab_size()}, BOS={BOS}") max_n = 64; top_k = 40; temp = 1.0; device = "cpu" prompts = [ "The capital of France is", "The theory of relativity states that", "In the beginning,", ] for prompt in prompts: ids = torch.tensor([[BOS] + tok.encode(prompt)], device=device, dtype=torch.long) print(f"\n=== PROMPT: {prompt} ===") with torch.no_grad(): for step in range(max_n): # Cast to bfloat16 before forward (model weights are bf16) input_ids = ids[:, -100:].to(dtype=torch.bfloat16).long() if ids.dtype != torch.long else ids[:, -100:] logits = model(input_ids, targets=None)[0, -1, :].float() / temp vals, idxs = logits.topk(top_k) probs = torch.softmax(vals, dim=-1) nid = idxs[torch.multinomial(probs, 1)].item() ids = torch.cat([ids, torch.tensor([[nid]], device=device, dtype=torch.long)], dim=1) out = tok.decode(ids[0].tolist()) print(f"OUTPUT ({len(ids[0])} tokens): {out[:300]}")