TuringsSolutions commited on
Commit
09b3712
·
verified ·
1 Parent(s): c883619

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +385 -294
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # app.py — CHR Compressed-Only Learning via Trading Card PNG (Investor Demo)
2
- # End-goal (Level 1): dataset -> compressed codes -> 2D "trading card" image
3
  # Training reads ONLY the trading card PNG pixels (no codes.bin used in training)
4
 
5
  import os
@@ -120,22 +120,18 @@ In the beginning, people stored knowledge in libraries, then in databases, and n
120
  Compression isn’t just saving space — it’s choosing what matters.
121
  A constellation is a pattern you can navigate.
122
  Entropy is a measure of surprise, and learning is surprise turning into structure.
123
-
124
  A system that learns from compressed data never needs the original.
125
  It doesn’t memorize pixels; it memorizes geometry.
126
  It doesn’t hoard text; it extracts signals.
127
  The question isn’t “Can it compress?” but “Can it learn after compressing?”
128
-
129
  Investors love seeing systems move.
130
  They love curves that fall.
131
  They love maps that cluster.
132
  They love a demo that feels alive.
133
-
134
  This demo builds a codec from your dataset,
135
  then trains a model exclusively on the codec’s trading card.
136
  No raw text is used during training.
137
  Only the trading card exists.
138
-
139
  We call the clusters constellations.
140
  We call the structure harvestable.
141
  We call the drop in entropy visible proof.
@@ -267,9 +263,9 @@ def save_codes_and_codec(code_bytes: bytes, codec: Dict, out_dir: str) -> Tuple[
267
  return bin_path, meta_path
268
 
269
  # -----------------------------
270
- # Trading Card layer (THE NEW PIECE)
271
  # -----------------------------
272
- CARD_MAGIC = b"TCAR" # Trading Card magic
273
  CARD_VER = 1
274
 
275
  def _sha256_hex(b: bytes) -> str:
@@ -279,13 +275,6 @@ def _crc32_u32(b: bytes) -> int:
279
  return zlib.crc32(b) & 0xFFFFFFFF
280
 
281
  def pack_trading_card_payload(code_bytes: bytes, codec: Dict, title: str = "CHR Trading Card") -> bytes:
282
- """
283
- A self-contained binary payload that lives inside the card's pixels.
284
-
285
- Layout:
286
- magic(4) | ver(u32) | header_len(u32) | header_json | code_len(u32) | code_bytes
287
- Header contains checksums for verification.
288
- """
289
  header = {
290
  "title": title,
291
  "codec": {
@@ -305,7 +294,6 @@ def pack_trading_card_payload(code_bytes: bytes, codec: Dict, title: str = "CHR
305
  "sha256": _sha256_hex(code_bytes),
306
  }
307
  header_json = json.dumps(header, ensure_ascii=False).encode("utf-8")
308
-
309
  blob = bytearray()
310
  blob += CARD_MAGIC
311
  blob += struct.pack("<I", CARD_VER)
@@ -316,9 +304,6 @@ def pack_trading_card_payload(code_bytes: bytes, codec: Dict, title: str = "CHR
316
  return bytes(blob)
317
 
318
  def unpack_trading_card_payload(payload: bytes) -> Tuple[Dict, bytes]:
319
- """
320
- Returns (header_dict, code_bytes) after validating structure.
321
- """
322
  if len(payload) < 16:
323
  raise ValueError("Card payload too small.")
324
  if payload[:4] != CARD_MAGIC:
@@ -338,45 +323,126 @@ def unpack_trading_card_payload(payload: bytes) -> Tuple[Dict, bytes]:
338
  raise ValueError("Card payload truncated.")
339
  return header, code_bytes
340
 
341
- def bytes_to_data_slab_image(payload: bytes, slab_w: int = 256) -> np.ndarray:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  """
343
- Convert payload bytes into a 2D slab (grayscale) image as uint8.
344
- We pad to full rows.
345
  """
346
- arr = np.frombuffer(payload, dtype=np.uint8)
347
- w = int(slab_w)
348
- h = int(math.ceil(len(arr) / w))
349
- pad = h*w - len(arr)
350
- if pad > 0:
351
- arr = np.concatenate([arr, np.zeros(pad, dtype=np.uint8)], axis=0)
352
- slab = arr.reshape(h, w)
353
- return slab
354
-
355
- def data_slab_image_to_bytes(slab: np.ndarray, orig_len: int) -> bytes:
356
- flat = slab.astype(np.uint8).ravel()
357
- return bytes(flat[:orig_len])
 
358
 
359
- def make_holo_front(U: np.ndarray, K: int, W: int, H: int, seed: int = 0) -> np.ndarray:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  """
361
- Create a holographic-looking RGB background from anchors U.
362
- Deterministic and fast. This is "sizzle"; it doesn't contain the payload.
363
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  rng = np.random.RandomState(int(seed) + 123)
365
- # pick a few anchor directions and random frequencies
366
  d = U.shape[1]
367
  n = min(K, 16)
368
  idx = rng.choice(K, size=n, replace=K < n)
369
- V = U[idx] # [n, d]
370
- # Create a coordinate grid
371
  yy, xx = np.mgrid[0:H, 0:W].astype(np.float32)
372
  xx = (xx / max(1, W-1) - 0.5) * 2.0
373
  yy = (yy / max(1, H-1) - 0.5) * 2.0
374
-
375
- # derive frequencies and phases from U
376
  freqs = rng.uniform(2.0, 10.0, size=n).astype(np.float32)
377
  phases = rng.uniform(0, 2*np.pi, size=n).astype(np.float32)
378
 
379
- # 3 channels from different mixtures
380
  out = np.zeros((H, W, 3), dtype=np.float32)
381
  for c in range(3):
382
  acc = np.zeros((H, W), dtype=np.float32)
@@ -384,85 +450,57 @@ def make_holo_front(U: np.ndarray, K: int, W: int, H: int, seed: int = 0) -> np.
384
  a = float(V[i, (c*7) % d])
385
  b = float(V[i, (c*11 + 3) % d])
386
  acc += np.cos(freqs[i] * (a*xx + b*yy) + phases[i])
387
- # normalize 0..1
388
  acc = (acc - acc.min()) / (acc.max() - acc.min() + 1e-9)
389
  out[..., c] = acc
390
 
391
- # add a subtle radial vignette
392
  rr = np.sqrt(xx*xx + yy*yy)
393
  vignette = np.clip(1.1 - 0.35*rr, 0.6, 1.1)
394
  out *= vignette[..., None]
395
  out = np.clip(out, 0.0, 1.0)
396
  return (out * 255.0).astype(np.uint8)
397
 
398
- def compose_trading_card(front_rgb: np.ndarray, slab_gray: np.ndarray, title: str, subtitle: str) -> np.ndarray:
399
  """
400
- Make a single card image:
401
- - top: holo front with title/subtitle overlay
402
- - bottom: data slab grid (this is where bytes live)
403
- Output: RGB uint8 image.
404
  """
405
- Hf, Wf, _ = front_rgb.shape
406
- slab_h, slab_w = slab_gray.shape
407
-
408
- # Make slab into RGB
409
- slab_rgb = np.stack([slab_gray]*3, axis=-1)
410
-
411
- # Add a separator
412
- sep = np.full((8, Wf, 3), 16, dtype=np.uint8)
413
-
414
- # Resize slab to match card width (nearest)
415
- if slab_w != Wf:
416
- # simple nearest resize
417
- slab_img = Image.fromarray(slab_gray, mode="L")
418
- slab_img = slab_img.resize((Wf, slab_h), resample=Image.NEAREST)
419
- slab_gray2 = np.array(slab_img, dtype=np.uint8)
420
- slab_rgb = np.stack([slab_gray2]*3, axis=-1)
421
-
422
- card = np.concatenate([front_rgb, sep, slab_rgb], axis=0)
423
-
424
- # Overlay text on front using PIL (fast and dependency-light)
425
  try:
426
- pil = Image.fromarray(card, mode="RGB")
427
  from PIL import ImageDraw, ImageFont
428
  draw = ImageDraw.Draw(pil)
429
- # default font
430
- font1 = ImageFont.load_default()
431
- font2 = ImageFont.load_default()
432
 
433
- # Place title/subtitle
434
- draw.rectangle([0, 0, Wf, 26], fill=(0, 0, 0))
435
- draw.text((10, 6), title, fill=(255, 255, 255), font=font1)
436
- draw.text((10, 34), subtitle, fill=(255, 255, 255), font=font2)
437
 
438
- # Add a "foil" frame
439
- draw.rectangle([4, 4, Wf-5, Hf-5], outline=(220, 220, 255), width=2)
440
 
441
- card = np.array(pil, dtype=np.uint8)
442
  except Exception:
443
- pass
444
-
445
- return card
446
 
447
  def save_png(arr: np.ndarray, path: str):
448
  Image.fromarray(arr).save(path, format="PNG")
449
 
450
- def load_png(path: str) -> np.ndarray:
 
451
  return np.array(Image.open(path).convert("RGB"), dtype=np.uint8)
452
 
453
- def extract_payload_from_card(card_rgb: np.ndarray, slab_top: int, slab_w: int, payload_len: int) -> bytes:
454
- """
455
- Extract bytes from the slab region (grayscale interpretation).
456
- We read from the card's bottom data slab.
457
- """
458
- slab_rgb = card_rgb[slab_top:, :, :]
459
- slab_gray = slab_rgb[..., 0].astype(np.uint8) # stored grayscale replicated in RGB
460
- # slab_gray already width==card width, we assume slab_w==card width
461
- slab = slab_gray
462
- return data_slab_image_to_bytes(slab, payload_len)
463
-
464
  # -----------------------------
465
- # Visuals (existing + card-specific)
466
  # -----------------------------
467
  def plot_entropy(Hg, Hs, out_path):
468
  plt.figure(figsize=(6,4))
@@ -532,10 +570,6 @@ def plot_before_after_tracks(before_bytes: List[int], after_bytes: List[int], ou
532
  plt.close()
533
 
534
  def make_card_tilt_gif(card_rgb: np.ndarray, out_path: str, frames: int = 24, fps: int = 12):
535
- """
536
- Cheap holo tilt effect: shift color channels + brightness gradient over the front region.
537
- This is pure sizzle and very fast.
538
- """
539
  H, W, _ = card_rgb.shape
540
  frames = int(max(8, min(frames, 48)))
541
  fps = int(max(6, min(fps, 24)))
@@ -547,13 +581,10 @@ def make_card_tilt_gif(card_rgb: np.ndarray, out_path: str, frames: int = 24, fp
547
  dy = int(2 + 3*np.cos(a))
548
 
549
  img = card_rgb.copy().astype(np.int16)
550
-
551
- # apply gentle "tilt" to the top half (front)
552
- front_h = int(H * 0.45)
553
  yy, xx = np.mgrid[0:front_h, 0:W]
554
  grad = (0.85 + 0.15*np.sin(a + (xx / max(1, W-1))*2*np.pi)).astype(np.float32)
555
 
556
- # channel shift
557
  r = np.roll(img[:front_h, :, 0], shift=dx, axis=1)
558
  g = np.roll(img[:front_h, :, 1], shift=dy, axis=0)
559
  b = img[:front_h, :, 2]
@@ -568,28 +599,25 @@ def make_card_tilt_gif(card_rgb: np.ndarray, out_path: str, frames: int = 24, fp
568
  imageio.mimsave(out_path, imgs, fps=fps)
569
 
570
  # -----------------------------
571
- # Training: byte-model reads ONLY the trading card PNG pixels
572
  # -----------------------------
573
  import torch
574
  import torch.nn as nn
575
  from torch.utils.data import Dataset, DataLoader
576
 
577
- class CardByteDataset(Dataset):
578
  """
579
- Produces next-byte prediction windows from the trading card's payload bytes.
580
- Importantly, it reads from the CARD PNG (pixels) every time.
581
  """
582
- def __init__(self, card_png_path: str, payload_len: int, slab_top: int, block_size: int = 128):
583
  self.card_png_path = card_png_path
584
- self.payload_len = int(payload_len)
585
- self.slab_top = int(slab_top)
586
  self.block_size = int(block_size)
587
 
588
- card = load_png(card_png_path)
589
- slab_rgb = card[self.slab_top:, :, :]
590
- slab_gray = slab_rgb[..., 0].astype(np.uint8)
591
- flat = slab_gray.ravel()
592
- self.bytes = torch.tensor(list(flat[:self.payload_len]), dtype=torch.long)
593
 
594
  def __len__(self):
595
  return max(0, len(self.bytes) - self.block_size - 1)
@@ -636,21 +664,19 @@ def sample_bytes(model, start: List[int], steps: int, device: str = "cpu", tempe
636
  seq.append(nxt)
637
  return seq
638
 
639
- def train_on_card_png(card_png_path: str,
640
- payload_len: int,
641
- slab_top: int,
642
- steps: int = 250,
643
- batch_size: int = 32,
644
- block_size: int = 128,
645
- lr: float = 5e-4,
646
- device: str = "cpu",
647
- log_every: int = 25):
648
- ds = CardByteDataset(card_png_path, payload_len=payload_len, slab_top=slab_top, block_size=block_size)
649
  n_windows = len(ds)
650
  if n_windows <= 0:
651
- raise RuntimeError(f"Card payload too small for block_size={block_size}. Reduce block_size or increase data.")
652
 
653
- # avoid drop_last if small
654
  drop_last = n_windows >= batch_size
655
  dl = DataLoader(ds, batch_size=batch_size, shuffle=True, drop_last=drop_last)
656
  it = iter(dl)
@@ -701,10 +727,16 @@ STATE = {
701
  "codec_path": None,
702
  "codec": None,
703
 
704
- "card_png_path": None,
705
- "card_payload_len": None,
706
- "card_slab_top": None,
707
- "card_header": None,
 
 
 
 
 
 
708
 
709
  "model": None,
710
  }
@@ -750,20 +782,16 @@ def ingest_file(file_obj, units_mode: str):
750
  except Exception as e:
751
  return f"Error ingesting file: {e}"
752
 
753
- def compress_and_make_card(K, iters, beta, slab_bins, tau, seed, radial_bins,
754
- card_width, front_height, title_text):
755
  """
756
- 1) CHR compress
757
- 2) build codes.bin + codec.json
758
- 3) build trading card PNG that embeds a full self-contained payload
759
- 4) verify by extracting payload back from the PNG
760
  """
761
  try:
762
  units = STATE.get("units")
763
  if not units:
764
  return "No units loaded. Upload or load demo corpus.", None, None, None, None, None, None
765
 
766
- # --- CHR compression ---
767
  Z, backend = embed_texts(units, prefer_sentence_transformer=True)
768
  U, p, Hg, Hs = chr_optimize(
769
  Z, K=int(K), iters=int(iters), beta=float(beta),
@@ -775,11 +803,10 @@ def compress_and_make_card(K, iters, beta, slab_bins, tau, seed, radial_bins,
775
 
776
  edges = make_radial_bins(radials, B=int(radial_bins))
777
  bins_q = np.array([quantize_radial(float(radials[i]), edges) for i in range(len(units))], dtype=np.int32)
778
-
779
  code_bytes = pack_codes_to_bytes(labels, bins_q)
780
 
781
- # --- Save codes.bin + codec.json (for audit/download only) ---
782
  out_dir = tempfile.mkdtemp()
 
783
  codec = {
784
  "backend": backend,
785
  "K": int(K),
@@ -797,113 +824,191 @@ def compress_and_make_card(K, iters, beta, slab_bins, tau, seed, radial_bins,
797
  }
798
  bin_path, codec_path = save_codes_and_codec(code_bytes, codec, out_dir)
799
 
800
- # --- Build trading card payload ---
801
- payload = pack_trading_card_payload(code_bytes=code_bytes, codec=codec, title=str(title_text).strip()[:120] or "CHR Trading Card")
802
- payload_len = len(payload)
803
-
804
- # --- Render data slab and holo front ---
805
- card_w = int(card_width)
806
- front_h = int(front_height)
807
- slab = bytes_to_data_slab_image(payload, slab_w=card_w) # grayscale slab holding payload
808
- slab_h = slab.shape[0]
809
- # front with same width
810
- front = make_holo_front(np.array(U, dtype=np.float32), K=int(K), W=card_w, H=front_h, seed=int(seed))
811
-
812
- mhep = compute_mhep(Hg, Hs, K=int(K), bins=int(slab_bins))
813
-
814
- subtitle = f"Units={len(units)} K={int(K)} Bytes={payload_len} CRC32={_crc32_u32(code_bytes):08x}"
815
- card_rgb = compose_trading_card(front_rgb=front, slab_gray=slab, title=str(title_text).strip() or "CHR Trading Card", subtitle=subtitle)
816
-
817
- # --- Save PNG ---
818
- card_png_path = os.path.join(out_dir, "trading_card.png")
819
- save_png(card_rgb, card_png_path)
820
-
821
- # --- Determine slab top for extraction ---
822
- sep_h = 8
823
- slab_top = front_h + sep_h
824
-
825
- # --- Verify by extracting payload back from PNG ---
826
- card_loaded = load_png(card_png_path)
827
- extracted = extract_payload_from_card(card_loaded, slab_top=slab_top, slab_w=card_w, payload_len=payload_len)
828
- header2, code2 = unpack_trading_card_payload(extracted)
829
-
830
- ok_crc = (_crc32_u32(code2) == int(header2["crc32"]))
831
- ok_sha = (_sha256_hex(code2) == str(header2["sha256"]))
832
- verified = (ok_crc and ok_sha and len(code2) == len(code_bytes))
833
-
834
- # Save extra visuals
835
- ent_plot = os.path.join(out_dir, "entropy.png")
836
- map_plot = os.path.join(out_dir, "map.png")
837
- plot_entropy(Hg, Hs, ent_plot)
838
- plot_constellation_map(Z, U, labels, map_plot)
839
-
840
- tilt_gif = os.path.join(out_dir, "card_tilt.gif")
841
- # small tilt gif (fast)
842
- make_card_tilt_gif(card_rgb, tilt_gif, frames=18, fps=12)
843
 
844
  STATE.update({
845
  "Z": Z, "U": U, "labels": labels, "bins": bins_q,
846
  "bin_path": bin_path, "codec_path": codec_path, "codec": codec,
847
- "card_png_path": card_png_path,
848
- "card_payload_len": payload_len,
849
- "card_slab_top": slab_top,
850
- "card_header": header2,
 
851
  "model": None
852
  })
853
 
 
 
 
 
 
 
 
854
  report = (
855
- f"## Trading Card Generated\n"
856
  f"- **Embedding backend:** `{backend}`\n"
857
  f"- **Units:** **{len(units)}**\n"
858
  f"- **Constellations (K):** **{int(K)}**\n"
859
  f"- **Radial bins:** **{int(radial_bins)}**\n"
860
- f"- **Card width:** **{card_w}px**\n"
861
- f"- **Payload bytes inside card:** **{payload_len}**\n"
862
- f"- **Code bytes (constellation+radial):** **{len(code_bytes)}**\n"
863
  f"- **MHEP score:** **{mhep:.1f}%**\n"
864
- f"\n### Integrity\n"
865
- f"- CRC32 match: **{str(ok_crc)}**\n"
866
- f"- SHA256 match: **{str(ok_sha)}**\n"
867
- f"- **Verified:** {'✅ YES' if verified else '❌ NO'}\n"
868
- f"\n### Investor-proof constraint\n"
869
- f"Training can now read **only** the **PNG trading card pixels**."
870
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
871
 
872
- header_json = json.dumps(header2, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
873
 
874
- return report, ent_plot, map_plot, card_png_path, tilt_gif, bin_path, codec_path, header_json
 
 
 
 
 
 
 
 
 
 
875
  except Exception as e:
876
- return f"Error: {e}\n\n{traceback.format_exc()}", None, None, None, None, None, None, None
877
 
878
- def train_from_card(train_steps, batch_size, block_size, lr, log_every,
879
- temperature, rollout_steps, make_gif, gif_stride, gif_fps, gif_max_frames):
880
  """
881
- Train byte-level transformer on bytes extracted from the trading card PNG.
882
  Training uses ONLY the PNG pixels.
883
  """
884
  try:
885
- card_png_path = STATE.get("card_png_path")
886
- payload_len = STATE.get("card_payload_len")
887
- slab_top = STATE.get("card_slab_top")
888
- header = STATE.get("card_header")
889
 
890
- if not card_png_path or not os.path.exists(card_png_path) or payload_len is None or slab_top is None:
891
- return "No trading card found. Generate a card first.", None, None, None, None
892
 
893
  device = "cuda" if torch.cuda.is_available() else "cpu"
894
 
895
- # Re-load card and extract payload bytes (PNG pixels only)
896
- card_rgb = load_png(card_png_path)
897
- extracted = extract_payload_from_card(card_rgb, slab_top=slab_top, slab_w=card_rgb.shape[1], payload_len=int(payload_len))
898
 
899
- # Parse payload and verify again (still only from pixels)
900
- header2, code_bytes = unpack_trading_card_payload(extracted)
901
  ok_crc = (_crc32_u32(code_bytes) == int(header2["crc32"]))
902
  ok_sha = (_sha256_hex(code_bytes) == str(header2["sha256"]))
903
  verified = (ok_crc and ok_sha)
904
 
905
  # Auto-tune for speed + guarantee it runs
906
- L = len(extracted) # training bytes are the full payload
907
  user_block = int(block_size)
908
  user_bs = int(batch_size)
909
 
@@ -915,22 +1020,21 @@ def train_from_card(train_steps, batch_size, block_size, lr, log_every,
915
  tuned_bs = min(user_bs, max(8, n_windows // 4)) if n_windows > 0 else 1
916
  batch_size = int(max(1, tuned_bs))
917
 
918
- # Start context for sampling: from the payload bytes (not codes.bin)
919
- start = list(extracted[:block_size])
920
 
921
- out_dir = os.path.dirname(card_png_path)
922
 
923
- # BEFORE rollout (untrained)
924
  untrained = TinyByteTransformer(block_size=block_size).to(device)
925
  before_seq = sample_bytes(untrained, start=start, steps=int(rollout_steps), device=device, temperature=float(temperature))
926
  before_plot = os.path.join(out_dir, "rollout_before.png")
927
  plot_rollout_tracks(before_seq[-int(rollout_steps):], before_plot, title="BEFORE training (random)")
928
 
929
  # Train
930
- model, losses, ppls = train_on_card_png(
931
  card_png_path=card_png_path,
932
- payload_len=int(payload_len),
933
- slab_top=int(slab_top),
934
  steps=int(train_steps),
935
  batch_size=batch_size,
936
  block_size=block_size,
@@ -952,69 +1056,38 @@ def train_from_card(train_steps, batch_size, block_size, lr, log_every,
952
  compare_plot = os.path.join(out_dir, "rollout_compare.png")
953
  plot_before_after_tracks(before_seq[-int(rollout_steps):], after_seq[-int(rollout_steps):], compare_plot)
954
 
955
- # Optional GIF (cap frames)
956
- gif_path = None
957
- if bool(make_gif):
958
- gif_path = os.path.join(out_dir, "rollout.gif")
959
- # Make a lightweight GIF using the byte track plot frames (fast)
960
- # We'll render a few frames by progressively revealing the curve
961
- seq = after_seq[-int(rollout_steps):]
962
- stride = max(1, int(gif_stride))
963
- fps = max(6, int(gif_fps))
964
- max_frames = max(12, int(gif_max_frames))
965
-
966
- frames = []
967
- count = 0
968
- for t in range(10, len(seq), stride):
969
- fig = plt.figure(figsize=(7,3.6))
970
- plt.plot(seq[:t], linewidth=2)
971
- plt.ylim(-2, 260)
972
- plt.title("AFTER training — rollout from trading card pixels")
973
- plt.xlabel("Step"); plt.ylabel("Byte value")
974
- plt.tight_layout()
975
- buf = io.BytesIO()
976
- plt.savefig(buf, format="png", dpi=140)
977
- plt.close(fig)
978
- buf.seek(0)
979
- frames.append(imageio.imread(buf))
980
- count += 1
981
- if count >= max_frames:
982
- break
983
- imageio.mimsave(gif_path, frames, fps=fps)
984
-
985
  report = (
986
- f"## Training Complete (PNG-only)\n"
987
  f"- **Device:** `{device}`\n"
988
  f"- **Integrity (from pixels):** {'✅ Verified' if verified else '❌ Not verified'}\n"
989
- f"- **Payload bytes used for training:** **{L}**\n"
990
  f"- **Auto block_size:** **{block_size}** (requested {user_block})\n"
991
  f"- **Auto batch_size:** **{batch_size}** (requested {user_bs})\n"
992
  f"- **Steps:** **{int(train_steps)}** (logged every {int(log_every)})\n"
993
  f"- **Final logged loss:** **{losses[-1]:.4f}**\n"
994
  f"- **Final logged perplexity:** **{ppls[-1]:.2f}**\n"
995
- f"\n### What investors should notice\n"
996
- f"Perplexity falls while training from **a single trading card image**."
997
  )
998
 
999
  metrics = {"loss": losses, "ppl": ppls}
1000
- return report, train_plot, compare_plot, gif_path, json.dumps(metrics, indent=2)
1001
  except Exception as e:
1002
- return f"Training error: {e}\n\n{traceback.format_exc()}", None, None, None, None
1003
 
1004
  # -----------------------------
1005
  # Gradio UI
1006
  # -----------------------------
1007
  INTRO = """
1008
- # Trading Card Learning (Level 1)
1009
  **Pipeline:**
1010
  1) Compress dataset → **constellation/radial codes**
1011
- 2) Pack codes into a **single PNG trading card**
1012
- 3) Train a tiny model using **only the PNG pixels**
1013
-
1014
- This is the “data becomes a trading card” end goal.
1015
  """
1016
 
1017
- with gr.Blocks(title="Trading Card Learning (CHR)") as demo:
1018
  gr.Markdown(INTRO)
1019
 
1020
  with gr.Tab("1) Ingest"):
@@ -1028,71 +1101,89 @@ with gr.Blocks(title="Trading Card Learning (CHR)") as demo:
1028
  ingest_btn.click(ingest_file, inputs=[file_in, units_mode], outputs=[ingest_status])
1029
  demo_btn.click(load_demo, inputs=[units_mode], outputs=[ingest_status])
1030
 
1031
- with gr.Tab("2) Compress → Trading Card"):
1032
  with gr.Row():
1033
  K = gr.Slider(2, 48, value=16, step=1, label="K (constellations)")
1034
- iters = gr.Slider(5, 120, value=35, step=1, label="CHR iterations")
1035
  beta = gr.Slider(2, 30, value=16, step=1, label="beta (assignment sharpness)")
1036
  with gr.Row():
1037
  slab_bins = gr.Slider(3, 16, value=8, step=1, label="slab bins (entropy measure)")
1038
  tau = gr.Slider(1, 20, value=5, step=1, label="tau (slab softness)")
1039
  radial_bins = gr.Slider(8, 256, value=64, step=8, label="radial bins (compression alphabet)")
1040
  seed = gr.Slider(0, 9999, value=42, step=1, label="seed")
1041
- with gr.Row():
1042
- card_width = gr.Slider(128, 512, value=256, step=32, label="Card width (pixels)")
1043
- front_height = gr.Slider(96, 320, value=160, step=16, label="Front (holo) height (pixels)")
1044
- title_text = gr.Textbox(value="CHR Trading Card", label="Card title")
1045
 
1046
- compress_btn = gr.Button("Generate Trading Card PNG", variant="primary")
1047
  compress_report = gr.Markdown("")
1048
  with gr.Row():
1049
  ent_img = gr.Image(label="Entropy during compression", type="filepath")
1050
  map_img = gr.Image(label="Constellation map (PCA)", type="filepath")
1051
- with gr.Row():
1052
- card_img = gr.Image(label="Trading Card PNG (contains the data)", type="filepath")
1053
- card_tilt = gr.Image(label="Holo tilt (GIF)", type="filepath")
1054
  with gr.Row():
1055
  codes_bin = gr.File(label="codes.bin (audit only)")
1056
  codec_json = gr.File(label="codec.json (audit only)")
1057
- card_header = gr.Code(label="Trading card header (from pixels)", language="json")
 
1058
 
1059
  compress_btn.click(
1060
- compress_and_make_card,
1061
- inputs=[K, iters, beta, slab_bins, tau, seed, radial_bins, card_width, front_height, title_text],
1062
- outputs=[compress_report, ent_img, map_img, card_img, card_tilt, codes_bin, codec_json, card_header]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1063
  )
1064
 
1065
- with gr.Tab("3) Train from Trading Card"):
 
1066
  with gr.Row():
1067
- train_steps = gr.Slider(50, 2000, value=250, step=50, label="training steps (fast demo default)")
1068
  batch_size = gr.Slider(4, 128, value=32, step=4, label="batch size")
1069
  block_size = gr.Slider(32, 256, value=128, step=16, label="sequence length (bytes)")
1070
  with gr.Row():
1071
  lr = gr.Number(value=5e-4, label="learning rate")
1072
- log_every = gr.Slider(10, 200, value=25, step=5, label="log every (steps)")
1073
  temperature = gr.Slider(0.5, 2.0, value=1.0, step=0.05, label="rollout temperature")
1074
  rollout_steps = gr.Slider(40, 400, value=120, step=20, label="rollout steps (bytes)")
1075
- with gr.Row():
1076
- make_gif = gr.Checkbox(value=False, label="Generate rollout GIF (adds time)")
1077
- gif_stride = gr.Slider(1, 12, value=5, step=1, label="GIF stride (higher = faster)")
1078
- gif_fps = gr.Slider(6, 24, value=12, step=1, label="GIF FPS")
1079
- gif_max_frames = gr.Slider(12, 120, value=40, step=4, label="GIF max frames (cap)")
1080
 
1081
- train_btn = gr.Button("Train from PNG pixels + generate visuals", variant="primary")
1082
  train_report = gr.Markdown("")
1083
  with gr.Row():
1084
  train_img = gr.Image(label="Loss + perplexity", type="filepath")
1085
  compare_img = gr.Image(label="BEFORE vs AFTER rollout", type="filepath")
1086
- gif_out = gr.Image(label="Rollout GIF (optional)", type="filepath")
 
 
1087
  metrics_json = gr.Code(label="Metrics (JSON)", language="json")
1088
 
1089
  train_btn.click(
1090
- train_from_card,
1091
- inputs=[train_steps, batch_size, block_size, lr, log_every, temperature, rollout_steps,
1092
- make_gif, gif_stride, gif_fps, gif_max_frames],
1093
- outputs=[train_report, train_img, compare_img, gif_out, metrics_json]
1094
  )
1095
 
1096
  if __name__ == "__main__":
1097
- # Disable SSR for stability / fewer asyncio warnings in Spaces
1098
  demo.launch(ssr_mode=False)
 
1
  # app.py — CHR Compressed-Only Learning via Trading Card PNG (Investor Demo)
2
+ # End-goal (Level 1): dataset -> compressed codes -> payload -> embedded INSIDE a "real" trading card PNG
3
  # Training reads ONLY the trading card PNG pixels (no codes.bin used in training)
4
 
5
  import os
 
120
  Compression isn’t just saving space — it’s choosing what matters.
121
  A constellation is a pattern you can navigate.
122
  Entropy is a measure of surprise, and learning is surprise turning into structure.
 
123
  A system that learns from compressed data never needs the original.
124
  It doesn’t memorize pixels; it memorizes geometry.
125
  It doesn’t hoard text; it extracts signals.
126
  The question isn’t “Can it compress?” but “Can it learn after compressing?”
 
127
  Investors love seeing systems move.
128
  They love curves that fall.
129
  They love maps that cluster.
130
  They love a demo that feels alive.
 
131
  This demo builds a codec from your dataset,
132
  then trains a model exclusively on the codec’s trading card.
133
  No raw text is used during training.
134
  Only the trading card exists.
 
135
  We call the clusters constellations.
136
  We call the structure harvestable.
137
  We call the drop in entropy visible proof.
 
263
  return bin_path, meta_path
264
 
265
  # -----------------------------
266
+ # Trading Card payload (binary blob to hide)
267
  # -----------------------------
268
+ CARD_MAGIC = b"TCAR"
269
  CARD_VER = 1
270
 
271
  def _sha256_hex(b: bytes) -> str:
 
275
  return zlib.crc32(b) & 0xFFFFFFFF
276
 
277
  def pack_trading_card_payload(code_bytes: bytes, codec: Dict, title: str = "CHR Trading Card") -> bytes:
 
 
 
 
 
 
 
278
  header = {
279
  "title": title,
280
  "codec": {
 
294
  "sha256": _sha256_hex(code_bytes),
295
  }
296
  header_json = json.dumps(header, ensure_ascii=False).encode("utf-8")
 
297
  blob = bytearray()
298
  blob += CARD_MAGIC
299
  blob += struct.pack("<I", CARD_VER)
 
304
  return bytes(blob)
305
 
306
  def unpack_trading_card_payload(payload: bytes) -> Tuple[Dict, bytes]:
 
 
 
307
  if len(payload) < 16:
308
  raise ValueError("Card payload too small.")
309
  if payload[:4] != CARD_MAGIC:
 
323
  raise ValueError("Card payload truncated.")
324
  return header, code_bytes
325
 
326
+ # -----------------------------
327
+ # NEW: LSB Steganography (payload hidden in pixels)
328
+ # -----------------------------
329
+ STEGO_MAGIC = b"TCV1" # trading card vault v1
330
+
331
+ def pack_stego(payload: bytes) -> bytes:
332
+ crc = _crc32_u32(payload)
333
+ sha = hashlib.sha256(payload).digest()
334
+ header = STEGO_MAGIC + struct.pack("<I", len(payload)) + struct.pack("<I", crc) + sha
335
+ return header + payload
336
+
337
+ def unpack_stego(packed: bytes) -> bytes:
338
+ if packed[:4] != STEGO_MAGIC:
339
+ raise ValueError("Bad stego magic")
340
+ n = struct.unpack("<I", packed[4:8])[0]
341
+ crc = struct.unpack("<I", packed[8:12])[0]
342
+ sha = packed[12:44]
343
+ data = packed[44:44+n]
344
+ if len(data) != n:
345
+ raise ValueError("Truncated stego payload")
346
+ if _crc32_u32(data) != crc:
347
+ raise ValueError("Stego CRC mismatch")
348
+ if hashlib.sha256(data).digest() != sha:
349
+ raise ValueError("Stego SHA mismatch")
350
+ return data
351
+
352
+ def stego_capacity_bytes(H: int, W: int, bits_per_channel: int = 1) -> int:
353
+ # capacity for packed payload (includes stego header ~44 bytes)
354
+ return (H * W * 3 * bits_per_channel) // 8
355
+
356
+ def embed_lsb_rgb(carrier_rgb: np.ndarray, payload_bytes: bytes, bits_per_channel: int = 1) -> np.ndarray:
357
  """
358
+ Embed payload into carrier RGB using LSBs.
359
+ Uses a robust header (magic+len+crc+sha).
360
  """
361
+ assert carrier_rgb.dtype == np.uint8 and carrier_rgb.ndim == 3 and carrier_rgb.shape[2] == 3
362
+ assert bits_per_channel in (1, 2), "Use 1 for best invisibility"
363
+
364
+ packed = pack_stego(payload_bytes)
365
+ bitstream = np.unpackbits(np.frombuffer(packed, dtype=np.uint8))
366
+
367
+ H, W, C = carrier_rgb.shape
368
+ capacity_bits = H * W * C * bits_per_channel
369
+ if len(bitstream) > capacity_bits:
370
+ raise RuntimeError(
371
+ f"Carrier too small: need {len(bitstream)} bits, capacity {capacity_bits} bits. "
372
+ f"Use larger image or bits_per_channel=2."
373
+ )
374
 
375
+ out = carrier_rgb.copy()
376
+ flat = out.reshape(-1)
377
+
378
+ k = bits_per_channel
379
+ mask_clear = 0xFF ^ ((1 << k) - 1)
380
+
381
+ if k == 1:
382
+ flat[:len(bitstream)] = (flat[:len(bitstream)] & mask_clear) | bitstream
383
+ else:
384
+ pad = (-len(bitstream)) % 2
385
+ if pad:
386
+ bitstream = np.concatenate([bitstream, np.zeros(pad, dtype=np.uint8)])
387
+ symbols = bitstream.reshape(-1, 2)
388
+ vals = (symbols[:, 0] << 1) | symbols[:, 1]
389
+ flat[:len(vals)] = (flat[:len(vals)] & mask_clear) | vals
390
+
391
+ return out
392
+
393
+ def extract_lsb_rgb(stego_rgb: np.ndarray, bits_per_channel: int = 1) -> bytes:
394
  """
395
+ Extract embedded payload bytes from RGB using LSBs.
 
396
  """
397
+ assert stego_rgb.dtype == np.uint8 and stego_rgb.ndim == 3 and stego_rgb.shape[2] == 3
398
+ k = bits_per_channel
399
+ flat = stego_rgb.reshape(-1)
400
+
401
+ # Need at least 44-byte stego header first
402
+ header_bytes = 44
403
+ header_bits = header_bytes * 8
404
+
405
+ def read_bits(nbits: int) -> np.ndarray:
406
+ if k == 1:
407
+ vals = (flat[:nbits] & 1).astype(np.uint8)
408
+ return vals
409
+ else:
410
+ nvals = (nbits + 1) // 2
411
+ vals = (flat[:nvals] & 3).astype(np.uint8)
412
+ bits = np.zeros(nvals * 2, dtype=np.uint8)
413
+ bits[0::2] = (vals >> 1) & 1
414
+ bits[1::2] = vals & 1
415
+ return bits[:nbits]
416
+
417
+ hb = read_bits(header_bits)
418
+ header = np.packbits(hb).tobytes()
419
+
420
+ if header[:4] != STEGO_MAGIC:
421
+ raise ValueError("Stego magic not found (wrong image or wrong bits_per_channel).")
422
+
423
+ n = struct.unpack("<I", header[4:8])[0]
424
+ total_bytes = 44 + n
425
+ total_bits = total_bytes * 8
426
+
427
+ bits = read_bits(total_bits)
428
+ packed = np.packbits(bits).tobytes()
429
+ return unpack_stego(packed)
430
+
431
+ # -----------------------------
432
+ # Visual "hologram" front (sizzle only)
433
+ # -----------------------------
434
+ def make_holo_front(U: np.ndarray, K: int, W: int, H: int, seed: int = 0) -> np.ndarray:
435
  rng = np.random.RandomState(int(seed) + 123)
 
436
  d = U.shape[1]
437
  n = min(K, 16)
438
  idx = rng.choice(K, size=n, replace=K < n)
439
+ V = U[idx]
 
440
  yy, xx = np.mgrid[0:H, 0:W].astype(np.float32)
441
  xx = (xx / max(1, W-1) - 0.5) * 2.0
442
  yy = (yy / max(1, H-1) - 0.5) * 2.0
 
 
443
  freqs = rng.uniform(2.0, 10.0, size=n).astype(np.float32)
444
  phases = rng.uniform(0, 2*np.pi, size=n).astype(np.float32)
445
 
 
446
  out = np.zeros((H, W, 3), dtype=np.float32)
447
  for c in range(3):
448
  acc = np.zeros((H, W), dtype=np.float32)
 
450
  a = float(V[i, (c*7) % d])
451
  b = float(V[i, (c*11 + 3) % d])
452
  acc += np.cos(freqs[i] * (a*xx + b*yy) + phases[i])
 
453
  acc = (acc - acc.min()) / (acc.max() - acc.min() + 1e-9)
454
  out[..., c] = acc
455
 
 
456
  rr = np.sqrt(xx*xx + yy*yy)
457
  vignette = np.clip(1.1 - 0.35*rr, 0.6, 1.1)
458
  out *= vignette[..., None]
459
  out = np.clip(out, 0.0, 1.0)
460
  return (out * 255.0).astype(np.uint8)
461
 
462
+ def overlay_holo_banner(base_rgb: np.ndarray, holo_rgb: np.ndarray, alpha: float = 0.35) -> np.ndarray:
463
  """
464
+ Apply holo overlay to top area of base card for investor wow,
465
+ WITHOUT being required for decoding (payload is hidden in full image via LSB).
 
 
466
  """
467
+ out = base_rgb.copy().astype(np.float32)
468
+ H, W, _ = out.shape
469
+ hh = min(holo_rgb.shape[0], H)
470
+ hw = min(holo_rgb.shape[1], W)
471
+ holo = holo_rgb[:hh, :hw].astype(np.float32)
472
+ out[:hh, :hw] = (1 - alpha) * out[:hh, :hw] + alpha * holo
473
+ return np.clip(out, 0, 255).astype(np.uint8)
474
+
475
+ def add_text_frame(card_rgb: np.ndarray, title: str, subtitle: str) -> np.ndarray:
 
 
 
 
 
 
 
 
 
 
 
476
  try:
477
+ pil = Image.fromarray(card_rgb, mode="RGB")
478
  from PIL import ImageDraw, ImageFont
479
  draw = ImageDraw.Draw(pil)
480
+ font = ImageFont.load_default()
481
+ W, H = pil.size
 
482
 
483
+ # header bar
484
+ draw.rectangle([0, 0, W, 26], fill=(0, 0, 0))
485
+ draw.text((10, 6), title[:80], fill=(255, 255, 255), font=font)
486
+ draw.text((10, 34), subtitle[:120], fill=(255, 255, 255), font=font)
487
 
488
+ # frame
489
+ draw.rectangle([4, 4, W-5, H-5], outline=(220, 220, 255), width=2)
490
 
491
+ return np.array(pil, dtype=np.uint8)
492
  except Exception:
493
+ return card_rgb
 
 
494
 
495
  def save_png(arr: np.ndarray, path: str):
496
  Image.fromarray(arr).save(path, format="PNG")
497
 
498
+ def load_image_any(path: str) -> np.ndarray:
499
+ # For carrier: allow jpg/png, convert to RGB
500
  return np.array(Image.open(path).convert("RGB"), dtype=np.uint8)
501
 
 
 
 
 
 
 
 
 
 
 
 
502
  # -----------------------------
503
+ # Visuals
504
  # -----------------------------
505
  def plot_entropy(Hg, Hs, out_path):
506
  plt.figure(figsize=(6,4))
 
570
  plt.close()
571
 
572
  def make_card_tilt_gif(card_rgb: np.ndarray, out_path: str, frames: int = 24, fps: int = 12):
 
 
 
 
573
  H, W, _ = card_rgb.shape
574
  frames = int(max(8, min(frames, 48)))
575
  fps = int(max(6, min(fps, 24)))
 
581
  dy = int(2 + 3*np.cos(a))
582
 
583
  img = card_rgb.copy().astype(np.int16)
584
+ front_h = int(H * 0.35)
 
 
585
  yy, xx = np.mgrid[0:front_h, 0:W]
586
  grad = (0.85 + 0.15*np.sin(a + (xx / max(1, W-1))*2*np.pi)).astype(np.float32)
587
 
 
588
  r = np.roll(img[:front_h, :, 0], shift=dx, axis=1)
589
  g = np.roll(img[:front_h, :, 1], shift=dy, axis=0)
590
  b = img[:front_h, :, 2]
 
599
  imageio.mimsave(out_path, imgs, fps=fps)
600
 
601
  # -----------------------------
602
+ # Training: byte-model reads ONLY the trading card PNG pixels (LSB extraction)
603
  # -----------------------------
604
  import torch
605
  import torch.nn as nn
606
  from torch.utils.data import Dataset, DataLoader
607
 
608
+ class CardStegoDataset(Dataset):
609
  """
610
+ Produces next-byte prediction windows from bytes extracted from the card PNG via LSB.
 
611
  """
612
+ def __init__(self, card_png_path: str, bits_per_channel: int = 1, block_size: int = 128):
613
  self.card_png_path = card_png_path
614
+ self.bits_per_channel = int(bits_per_channel)
 
615
  self.block_size = int(block_size)
616
 
617
+ card = load_image_any(card_png_path)
618
+ payload = extract_lsb_rgb(card, bits_per_channel=self.bits_per_channel)
619
+ # We train on the whole payload blob (contains card header+codes)
620
+ self.bytes = torch.tensor(list(payload), dtype=torch.long)
 
621
 
622
  def __len__(self):
623
  return max(0, len(self.bytes) - self.block_size - 1)
 
664
  seq.append(nxt)
665
  return seq
666
 
667
+ def train_on_stego_card(card_png_path: str,
668
+ bits_per_channel: int = 1,
669
+ steps: int = 180,
670
+ batch_size: int = 32,
671
+ block_size: int = 128,
672
+ lr: float = 5e-4,
673
+ device: str = "cpu",
674
+ log_every: int = 20):
675
+ ds = CardStegoDataset(card_png_path, bits_per_channel=bits_per_channel, block_size=block_size)
 
676
  n_windows = len(ds)
677
  if n_windows <= 0:
678
+ raise RuntimeError(f"Not enough embedded bytes for block_size={block_size}. Reduce block_size or embed more.")
679
 
 
680
  drop_last = n_windows >= batch_size
681
  dl = DataLoader(ds, batch_size=batch_size, shuffle=True, drop_last=drop_last)
682
  it = iter(dl)
 
727
  "codec_path": None,
728
  "codec": None,
729
 
730
+ "payload_bytes": None, # the true payload to embed (TCAR blob)
731
+ "payload_len": None,
732
+
733
+ "plain_card_png": None, # original "CHR slab card" (optional preview)
734
+ "plain_card_gif": None,
735
+
736
+ "final_card_png": None, # carrier-with-embedded-payload (THIS is what we train on)
737
+ "final_card_gif": None,
738
+ "final_card_header": None, # header decoded from embedded payload
739
+ "stego_bits": 1,
740
 
741
  "model": None,
742
  }
 
782
  except Exception as e:
783
  return f"Error ingesting file: {e}"
784
 
785
+ def compress_and_make_payload(K, iters, beta, slab_bins, tau, seed, radial_bins, title_text):
 
786
  """
787
+ Compress, create payload bytes (TCAR), and also render a quick "preview" CHR card (slab style).
788
+ NOTE: The preview is not used for training. Training will use the final embedded carrier card.
 
 
789
  """
790
  try:
791
  units = STATE.get("units")
792
  if not units:
793
  return "No units loaded. Upload or load demo corpus.", None, None, None, None, None, None
794
 
 
795
  Z, backend = embed_texts(units, prefer_sentence_transformer=True)
796
  U, p, Hg, Hs = chr_optimize(
797
  Z, K=int(K), iters=int(iters), beta=float(beta),
 
803
 
804
  edges = make_radial_bins(radials, B=int(radial_bins))
805
  bins_q = np.array([quantize_radial(float(radials[i]), edges) for i in range(len(units))], dtype=np.int32)
 
806
  code_bytes = pack_codes_to_bytes(labels, bins_q)
807
 
 
808
  out_dir = tempfile.mkdtemp()
809
+
810
  codec = {
811
  "backend": backend,
812
  "K": int(K),
 
824
  }
825
  bin_path, codec_path = save_codes_and_codec(code_bytes, codec, out_dir)
826
 
827
+ payload = pack_trading_card_payload(
828
+ code_bytes=code_bytes,
829
+ codec=codec,
830
+ title=str(title_text).strip()[:120] or "CHR Trading Card"
831
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
832
 
833
  STATE.update({
834
  "Z": Z, "U": U, "labels": labels, "bins": bins_q,
835
  "bin_path": bin_path, "codec_path": codec_path, "codec": codec,
836
+ "payload_bytes": payload,
837
+ "payload_len": int(len(payload)),
838
+ "final_card_png": None,
839
+ "final_card_gif": None,
840
+ "final_card_header": None,
841
  "model": None
842
  })
843
 
844
+ # Visuals
845
+ ent_plot = os.path.join(out_dir, "entropy.png")
846
+ map_plot = os.path.join(out_dir, "map.png")
847
+ plot_entropy(Hg, Hs, ent_plot)
848
+ plot_constellation_map(Z, U, labels, map_plot)
849
+
850
+ mhep = compute_mhep(Hg, Hs, K=int(K), bins=int(slab_bins))
851
  report = (
852
+ f"## Payload Ready\n"
853
  f"- **Embedding backend:** `{backend}`\n"
854
  f"- **Units:** **{len(units)}**\n"
855
  f"- **Constellations (K):** **{int(K)}**\n"
856
  f"- **Radial bins:** **{int(radial_bins)}**\n"
857
+ f"- **Code bytes:** **{len(code_bytes)}**\n"
858
+ f"- **Payload bytes (TCAR):** **{len(payload)}**\n"
 
859
  f"- **MHEP score:** **{mhep:.1f}%**\n"
860
+ f"\nNext: upload a **carrier trading card image** and embed this payload inside it."
 
 
 
 
 
861
  )
862
+ return report, ent_plot, map_plot, bin_path, codec_path, json.dumps({"payload_len": len(payload)}, indent=2), json.dumps(codec, indent=2)
863
+ except Exception as e:
864
+ return f"Error: {e}\n\n{traceback.format_exc()}", None, None, None, None, None, None
865
+
866
+ def embed_payload_into_carrier(carrier_file, card_width, holo_height, holo_alpha, stego_bits, title_text):
867
+ """
868
+ Step before training:
869
+ - Take uploaded carrier image
870
+ - Resize/crop to a clean card canvas
871
+ - Embed TCAR payload invisibly via LSB over full image
872
+ - Overlay holo banner for wow (optional)
873
+ - Save final PNG and verify by extracting payload back from pixels
874
+ """
875
+ try:
876
+ payload = STATE.get("payload_bytes")
877
+ codec = STATE.get("codec")
878
+ U = STATE.get("U")
879
+ if payload is None or codec is None or U is None:
880
+ return "No payload exists yet. Run compression first.", None, None, None, None
881
 
882
+ b, name = _bytes_from_upload(carrier_file)
883
+ if not b:
884
+ return "Upload a carrier image (PNG/JPG).", None, None, None, None
885
+
886
+ out_dir = tempfile.mkdtemp()
887
+ carrier_path = os.path.join(out_dir, f"carrier_{name}")
888
+ Path(carrier_path).write_bytes(b)
889
+
890
+ # Load carrier
891
+ rgb = load_image_any(carrier_path)
892
+
893
+ # Build a consistent card canvas
894
+ W = int(card_width)
895
+ # Choose a trading-card-ish height ratio
896
+ H = int(max(256, round(W * 1.4)))
897
+ # center crop to ratio then resize
898
+ pil = Image.fromarray(rgb, mode="RGB")
899
+ srcW, srcH = pil.size
900
+ target_ratio = W / H
901
+ src_ratio = srcW / srcH
902
+ if src_ratio > target_ratio:
903
+ # too wide, crop width
904
+ newW = int(srcH * target_ratio)
905
+ x0 = (srcW - newW) // 2
906
+ pil = pil.crop((x0, 0, x0 + newW, srcH))
907
+ else:
908
+ # too tall, crop height
909
+ newH = int(srcW / target_ratio)
910
+ y0 = (srcH - newH) // 2
911
+ pil = pil.crop((0, y0, srcW, y0 + newH))
912
+ pil = pil.resize((W, H), resample=Image.BICUBIC)
913
+ base = np.array(pil, dtype=np.uint8)
914
+
915
+ bits = int(stego_bits)
916
+ if bits not in (1, 2):
917
+ bits = 1
918
+
919
+ # Capacity check
920
+ cap = stego_capacity_bytes(H, W, bits_per_channel=bits)
921
+ packed_need = len(pack_stego(payload))
922
+ if packed_need > cap:
923
+ return (
924
+ f"Carrier too small for payload.\n\n"
925
+ f"- Carrier canvas: **{W}x{H}**\n"
926
+ f"- Capacity (@{bits} bit/channel): **~{cap} bytes**\n"
927
+ f"- Needed (payload+stego header): **{packed_need} bytes**\n\n"
928
+ f"Fix: increase card_width, use a larger carrier, or set bits_per_channel=2.",
929
+ None, None, None, None
930
+ )
931
+
932
+ # Embed
933
+ stego = embed_lsb_rgb(base, payload_bytes=payload, bits_per_channel=bits)
934
+
935
+ # Visual holo overlay (optional)
936
+ hh = int(holo_height)
937
+ hh = max(64, min(hh, H))
938
+ alpha = float(holo_alpha)
939
+ alpha = max(0.0, min(alpha, 0.85))
940
+ holo = make_holo_front(np.array(U, dtype=np.float32), K=int(codec["K"]), W=W, H=hh, seed=int(codec["seed"]))
941
+ stego2 = overlay_holo_banner(stego, holo, alpha=alpha)
942
+
943
+ # Add title/subtitle (sizzle)
944
+ subtitle = f"Units={codec.get('units_count')} K={codec.get('K')} Payload={len(payload)}B CRC32={_crc32_u32(payload):08x}"
945
+ final = add_text_frame(stego2, title=str(title_text).strip() or "Trading Card", subtitle=subtitle)
946
+
947
+ # Save final PNG (IMPORTANT: PNG only)
948
+ final_path = os.path.join(out_dir, "final_trading_card.png")
949
+ save_png(final, final_path)
950
+
951
+ # Verify by extraction (pixels only)
952
+ extracted_payload = extract_lsb_rgb(load_image_any(final_path), bits_per_channel=bits)
953
+ header2, code2 = unpack_trading_card_payload(extracted_payload)
954
+
955
+ ok_codes_crc = (_crc32_u32(code2) == int(header2["crc32"]))
956
+ ok_codes_sha = (_sha256_hex(code2) == str(header2["sha256"]))
957
+ verified = bool(ok_codes_crc and ok_codes_sha)
958
+
959
+ # Tilt gif for wow
960
+ tilt_path = os.path.join(out_dir, "final_card_tilt.gif")
961
+ make_card_tilt_gif(final, tilt_path, frames=18, fps=12)
962
+
963
+ STATE.update({
964
+ "final_card_png": final_path,
965
+ "final_card_gif": tilt_path,
966
+ "final_card_header": header2,
967
+ "stego_bits": bits,
968
+ "model": None
969
+ })
970
 
971
+ report = (
972
+ f"## Final Trading Card Created (Carrier + Embedded Data)\n"
973
+ f"- **Carrier canvas:** **{W}x{H}**\n"
974
+ f"- **Stego bits/channel:** **{bits}**\n"
975
+ f"- **Capacity:** ~**{cap} bytes**\n"
976
+ f"- **Needed:** **{packed_need} bytes**\n"
977
+ f"- **Verified (from pixels):** {'✅ YES' if verified else '❌ NO'}\n"
978
+ f"\nThis PNG now **contains the real dataset representation inside its pixels**.\n"
979
+ f"Next: Train — model will extract bytes from **this image only**."
980
+ )
981
+ return report, final_path, tilt_path, json.dumps(header2, indent=2), json.dumps({"capacity_bytes": cap, "needed_bytes": packed_need}, indent=2)
982
  except Exception as e:
983
+ return f"Embed error: {e}\n\n{traceback.format_exc()}", None, None, None, None
984
 
985
+ def train_from_final_card(train_steps, batch_size, block_size, lr, log_every,
986
+ temperature, rollout_steps):
987
  """
988
+ Train byte-level transformer on bytes extracted from FINAL trading card PNG via LSB.
989
  Training uses ONLY the PNG pixels.
990
  """
991
  try:
992
+ card_png_path = STATE.get("final_card_png")
993
+ bits = int(STATE.get("stego_bits", 1))
 
 
994
 
995
+ if not card_png_path or not os.path.exists(card_png_path):
996
+ return "No final trading card found. Embed payload onto a carrier card first.", None, None, None, None
997
 
998
  device = "cuda" if torch.cuda.is_available() else "cpu"
999
 
1000
+ # Extract payload from pixels
1001
+ card_rgb = load_image_any(card_png_path)
1002
+ payload = extract_lsb_rgb(card_rgb, bits_per_channel=bits)
1003
 
1004
+ # Verify and parse
1005
+ header2, code_bytes = unpack_trading_card_payload(payload)
1006
  ok_crc = (_crc32_u32(code_bytes) == int(header2["crc32"]))
1007
  ok_sha = (_sha256_hex(code_bytes) == str(header2["sha256"]))
1008
  verified = (ok_crc and ok_sha)
1009
 
1010
  # Auto-tune for speed + guarantee it runs
1011
+ L = len(payload)
1012
  user_block = int(block_size)
1013
  user_bs = int(batch_size)
1014
 
 
1020
  tuned_bs = min(user_bs, max(8, n_windows // 4)) if n_windows > 0 else 1
1021
  batch_size = int(max(1, tuned_bs))
1022
 
1023
+ # Start context for sampling: from the payload bytes
1024
+ start = list(payload[:block_size])
1025
 
1026
+ out_dir = tempfile.mkdtemp()
1027
 
1028
+ # BEFORE rollout
1029
  untrained = TinyByteTransformer(block_size=block_size).to(device)
1030
  before_seq = sample_bytes(untrained, start=start, steps=int(rollout_steps), device=device, temperature=float(temperature))
1031
  before_plot = os.path.join(out_dir, "rollout_before.png")
1032
  plot_rollout_tracks(before_seq[-int(rollout_steps):], before_plot, title="BEFORE training (random)")
1033
 
1034
  # Train
1035
+ model, losses, ppls = train_on_stego_card(
1036
  card_png_path=card_png_path,
1037
+ bits_per_channel=bits,
 
1038
  steps=int(train_steps),
1039
  batch_size=batch_size,
1040
  block_size=block_size,
 
1056
  compare_plot = os.path.join(out_dir, "rollout_compare.png")
1057
  plot_before_after_tracks(before_seq[-int(rollout_steps):], after_seq[-int(rollout_steps):], compare_plot)
1058
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1059
  report = (
1060
+ f"## Training Complete (Card Pixels Only)\n"
1061
  f"- **Device:** `{device}`\n"
1062
  f"- **Integrity (from pixels):** {'✅ Verified' if verified else '❌ Not verified'}\n"
1063
+ f"- **Payload bytes trained on:** **{L}**\n"
1064
  f"- **Auto block_size:** **{block_size}** (requested {user_block})\n"
1065
  f"- **Auto batch_size:** **{batch_size}** (requested {user_bs})\n"
1066
  f"- **Steps:** **{int(train_steps)}** (logged every {int(log_every)})\n"
1067
  f"- **Final logged loss:** **{losses[-1]:.4f}**\n"
1068
  f"- **Final logged perplexity:** **{ppls[-1]:.2f}**\n"
1069
+ f"\n### Investor-proof statement\n"
1070
+ f"The model learned from **a single trading card image** that contains the dataset **inside its pixels**."
1071
  )
1072
 
1073
  metrics = {"loss": losses, "ppl": ppls}
1074
+ return report, train_plot, compare_plot, before_plot, after_plot, json.dumps(metrics, indent=2)
1075
  except Exception as e:
1076
+ return f"Training error: {e}\n\n{traceback.format_exc()}", None, None, None, None, None
1077
 
1078
  # -----------------------------
1079
  # Gradio UI
1080
  # -----------------------------
1081
  INTRO = """
1082
+ # Trading Card Learning (Level 1 — Investor Demo)
1083
  **Pipeline:**
1084
  1) Compress dataset → **constellation/radial codes**
1085
+ 2) Pack codes into a payload (TCAR)
1086
+ 3) Upload a **real trading card image** (carrier) embed payload **inside pixels** (LSB stego)
1087
+ 4) Train a tiny model using **only the final card image pixels**
 
1088
  """
1089
 
1090
+ with gr.Blocks(title="Trading Card Learning (CHR + Stego)") as demo:
1091
  gr.Markdown(INTRO)
1092
 
1093
  with gr.Tab("1) Ingest"):
 
1101
  ingest_btn.click(ingest_file, inputs=[file_in, units_mode], outputs=[ingest_status])
1102
  demo_btn.click(load_demo, inputs=[units_mode], outputs=[ingest_status])
1103
 
1104
+ with gr.Tab("2) Compress → Payload"):
1105
  with gr.Row():
1106
  K = gr.Slider(2, 48, value=16, step=1, label="K (constellations)")
1107
+ iters = gr.Slider(5, 120, value=30, step=1, label="CHR iterations")
1108
  beta = gr.Slider(2, 30, value=16, step=1, label="beta (assignment sharpness)")
1109
  with gr.Row():
1110
  slab_bins = gr.Slider(3, 16, value=8, step=1, label="slab bins (entropy measure)")
1111
  tau = gr.Slider(1, 20, value=5, step=1, label="tau (slab softness)")
1112
  radial_bins = gr.Slider(8, 256, value=64, step=8, label="radial bins (compression alphabet)")
1113
  seed = gr.Slider(0, 9999, value=42, step=1, label="seed")
1114
+ title_text = gr.Textbox(value="CHR Trading Card", label="Card title")
1115
+ compress_btn = gr.Button("Create payload (ready to embed)", variant="primary")
 
 
1116
 
 
1117
  compress_report = gr.Markdown("")
1118
  with gr.Row():
1119
  ent_img = gr.Image(label="Entropy during compression", type="filepath")
1120
  map_img = gr.Image(label="Constellation map (PCA)", type="filepath")
 
 
 
1121
  with gr.Row():
1122
  codes_bin = gr.File(label="codes.bin (audit only)")
1123
  codec_json = gr.File(label="codec.json (audit only)")
1124
+ payload_info = gr.Code(label="Payload info", language="json")
1125
+ codec_dump = gr.Code(label="Codec (debug)", language="json")
1126
 
1127
  compress_btn.click(
1128
+ compress_and_make_payload,
1129
+ inputs=[K, iters, beta, slab_bins, tau, seed, radial_bins, title_text],
1130
+ outputs=[compress_report, ent_img, map_img, codes_bin, codec_json, payload_info, codec_dump]
1131
+ )
1132
+
1133
+ with gr.Tab("3) Embed on Carrier Trading Card"):
1134
+ gr.Markdown(
1135
+ "Upload a **carrier trading card image** (PNG/JPG). "
1136
+ "We will embed the payload **inside the image pixels** (LSB stego), then add a holo banner for wow."
1137
+ )
1138
+ with gr.Row():
1139
+ carrier_img = gr.File(label="Carrier trading card image (PNG/JPG)", file_types=[".png", ".jpg", ".jpeg"])
1140
+ card_width = gr.Slider(256, 1024, value=512, step=64, label="Card width (bigger = more capacity)")
1141
+ with gr.Row():
1142
+ holo_height = gr.Slider(64, 420, value=180, step=10, label="Hologram banner height")
1143
+ holo_alpha = gr.Slider(0.0, 0.85, value=0.35, step=0.05, label="Holo overlay strength")
1144
+ stego_bits = gr.Radio([1, 2], value=1, label="Stego bits/channel (1=stealth, 2=more capacity)")
1145
+ embed_btn = gr.Button("Embed payload into carrier + generate final PNG", variant="primary")
1146
+
1147
+ embed_report = gr.Markdown("")
1148
+ with gr.Row():
1149
+ final_card = gr.Image(label="Final Trading Card (PNG — contains the data inside pixels)", type="filepath")
1150
+ final_gif = gr.Image(label="Tilt GIF (wow)", type="filepath")
1151
+ header_json = gr.Code(label="Decoded header (from pixels)", language="json")
1152
+ cap_json = gr.Code(label="Capacity check", language="json")
1153
+
1154
+ embed_btn.click(
1155
+ embed_payload_into_carrier,
1156
+ inputs=[carrier_img, card_width, holo_height, holo_alpha, stego_bits, title_text],
1157
+ outputs=[embed_report, final_card, final_gif, header_json, cap_json]
1158
  )
1159
 
1160
+ with gr.Tab("4) Train from Final Trading Card"):
1161
+ gr.Markdown("Training extracts bytes from the **final card image pixels** (LSB) and learns on those bytes.")
1162
  with gr.Row():
1163
+ train_steps = gr.Slider(50, 1200, value=180, step=30, label="training steps (fast demo default)")
1164
  batch_size = gr.Slider(4, 128, value=32, step=4, label="batch size")
1165
  block_size = gr.Slider(32, 256, value=128, step=16, label="sequence length (bytes)")
1166
  with gr.Row():
1167
  lr = gr.Number(value=5e-4, label="learning rate")
1168
+ log_every = gr.Slider(10, 200, value=20, step=5, label="log every (steps)")
1169
  temperature = gr.Slider(0.5, 2.0, value=1.0, step=0.05, label="rollout temperature")
1170
  rollout_steps = gr.Slider(40, 400, value=120, step=20, label="rollout steps (bytes)")
 
 
 
 
 
1171
 
1172
+ train_btn = gr.Button("Train from final card pixels + generate visuals", variant="primary")
1173
  train_report = gr.Markdown("")
1174
  with gr.Row():
1175
  train_img = gr.Image(label="Loss + perplexity", type="filepath")
1176
  compare_img = gr.Image(label="BEFORE vs AFTER rollout", type="filepath")
1177
+ with gr.Row():
1178
+ before_img = gr.Image(label="BEFORE rollout track", type="filepath")
1179
+ after_img = gr.Image(label="AFTER rollout track", type="filepath")
1180
  metrics_json = gr.Code(label="Metrics (JSON)", language="json")
1181
 
1182
  train_btn.click(
1183
+ train_from_final_card,
1184
+ inputs=[train_steps, batch_size, block_size, lr, log_every, temperature, rollout_steps],
1185
+ outputs=[train_report, train_img, compare_img, before_img, after_img, metrics_json]
 
1186
  )
1187
 
1188
  if __name__ == "__main__":
 
1189
  demo.launch(ssr_mode=False)