TuringsSolutions commited on
Commit
629138b
·
verified ·
1 Parent(s): 8d97eaa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1098 -0
app.py ADDED
@@ -0,0 +1,1098 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py — CHR Compressed-Only Learning via Trading Card PNG (Investor Demo)
2
+ # End-goal (Level 1): dataset -> compressed codes -> 2D "trading card" image
3
+ # Training reads ONLY the trading card PNG pixels (no codes.bin used in training)
4
+
5
+ import os
6
+ # --- HF Spaces hardening: make OpenMP thread env vars valid integers ---
7
+ def _set_int_env(name: str, value: int):
8
+ v = os.environ.get(name, "")
9
+ if not str(v).isdigit():
10
+ os.environ[name] = str(value)
11
+
12
+ _set_int_env("OMP_NUM_THREADS", 1)
13
+ _set_int_env("OPENBLAS_NUM_THREADS", 1)
14
+ _set_int_env("MKL_NUM_THREADS", 1)
15
+ _set_int_env("NUMEXPR_NUM_THREADS", 1)
16
+
17
+ import io, re, json, math, struct, tempfile, traceback, hashlib, zlib
18
+ from pathlib import Path
19
+ from typing import List, Tuple, Dict, Optional
20
+
21
+ import numpy as np
22
+ import gradio as gr
23
+
24
+ import matplotlib
25
+ matplotlib.use("Agg")
26
+ import matplotlib.pyplot as plt
27
+
28
+ import imageio.v2 as imageio
29
+ from PIL import Image
30
+
31
+ # -----------------------------
32
+ # Optional DOCX support
33
+ # -----------------------------
34
+ _DOCX_OK = False
35
+ try:
36
+ from docx import Document
37
+ _DOCX_OK = True
38
+ except Exception:
39
+ _DOCX_OK = False
40
+
41
+ # -----------------------------
42
+ # Embeddings: sentence-transformers (preferred), fallback to hashing
43
+ # -----------------------------
44
+ from sklearn.feature_extraction.text import HashingVectorizer
45
+ from sklearn.decomposition import PCA
46
+
47
+ _ST_MODEL = None
48
+ def _load_st_model():
49
+ global _ST_MODEL
50
+ if _ST_MODEL is not None:
51
+ return _ST_MODEL
52
+ try:
53
+ from sentence_transformers import SentenceTransformer
54
+ _ST_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
55
+ return _ST_MODEL
56
+ except Exception:
57
+ return None
58
+
59
+ def embed_texts(texts: List[str], prefer_sentence_transformer: bool = True) -> Tuple[np.ndarray, str]:
60
+ texts = [t if isinstance(t, str) else str(t) for t in texts]
61
+ if prefer_sentence_transformer:
62
+ model = _load_st_model()
63
+ if model is not None:
64
+ try:
65
+ vecs = model.encode(
66
+ texts, batch_size=32, show_progress_bar=False,
67
+ convert_to_numpy=True, normalize_embeddings=True
68
+ )
69
+ return vecs.astype(np.float32), "sentence-transformers/all-MiniLM-L6-v2"
70
+ except Exception:
71
+ pass
72
+
73
+ hv = HashingVectorizer(n_features=768, alternate_sign=False, norm=None)
74
+ X = hv.transform(texts)
75
+ vecs = X.toarray().astype(np.float32)
76
+ norms = np.linalg.norm(vecs, axis=1, keepdims=True) + 1e-9
77
+ vecs = vecs / norms
78
+ return vecs, "HashingVectorizer(768d) fallback"
79
+
80
+ # -----------------------------
81
+ # Text ingestion / splitting
82
+ # -----------------------------
83
+ def _basic_sentence_split(text: str) -> List[str]:
84
+ rough = re.split(r'[\n\r]+|(?<=[\.\!\?])\s+', text.strip())
85
+ out = []
86
+ for s in rough:
87
+ s = s.strip()
88
+ if s:
89
+ out.append(s)
90
+ return out
91
+
92
+ def read_txt_bytes(b: bytes) -> str:
93
+ try:
94
+ return b.decode("utf-8")
95
+ except Exception:
96
+ return b.decode("latin-1", errors="ignore")
97
+
98
+ def read_docx_bytes(b: bytes) -> List[str]:
99
+ if not _DOCX_OK:
100
+ raise RuntimeError("python-docx not installed in this Space.")
101
+ bio = io.BytesIO(b)
102
+ doc = Document(bio)
103
+ paras = [p.text.strip() for p in doc.paragraphs]
104
+ return [p for p in paras if p and not p.isspace()]
105
+
106
+ def to_units(raw_text: str, mode: str) -> List[str]:
107
+ raw_text = raw_text.strip()
108
+ if not raw_text:
109
+ return []
110
+ if mode == "sentences":
111
+ return _basic_sentence_split(raw_text)
112
+ paras = [p.strip() for p in re.split(r"\n\s*\n+", raw_text) if p.strip()]
113
+ return paras
114
+
115
+ # -----------------------------
116
+ # Demo corpus (big enough to always train)
117
+ # -----------------------------
118
+ DEMO_CORPUS = """
119
+ In the beginning, people stored knowledge in libraries, then in databases, and now in neural networks.
120
+ Compression isn’t just saving space — it’s choosing what matters.
121
+ A constellation is a pattern you can navigate.
122
+ Entropy is a measure of surprise, and learning is surprise turning into structure.
123
+
124
+ A system that learns from compressed data never needs the original.
125
+ It doesn’t memorize pixels; it memorizes geometry.
126
+ It doesn’t hoard text; it extracts signals.
127
+ The question isn’t “Can it compress?” but “Can it learn after compressing?”
128
+
129
+ Investors love seeing systems move.
130
+ They love curves that fall.
131
+ They love maps that cluster.
132
+ They love a demo that feels alive.
133
+
134
+ This demo builds a codec from your dataset,
135
+ then trains a model exclusively on the codec’s trading card.
136
+ No raw text is used during training.
137
+ Only the trading card exists.
138
+
139
+ We call the clusters constellations.
140
+ We call the structure harvestable.
141
+ We call the drop in entropy visible proof.
142
+ """
143
+
144
+ # -----------------------------
145
+ # CHR core
146
+ # -----------------------------
147
+ def softmax(x, axis=-1):
148
+ x = x - np.max(x, axis=axis, keepdims=True)
149
+ ex = np.exp(x)
150
+ return ex / (np.sum(ex, axis=axis, keepdims=True) + 1e-9)
151
+
152
+ def global_range_entropy(p: np.ndarray) -> float:
153
+ m = p.mean(axis=0)
154
+ m_safe = np.clip(m, 1e-12, None)
155
+ return float(-(m_safe * np.log(m_safe)).sum())
156
+
157
+ def soft_slab_entropy(z: np.ndarray, U: np.ndarray, bins: int = 8, tau: float = 5.0) -> float:
158
+ t = z @ U.T
159
+ K = U.shape[0]
160
+ Hs = []
161
+ for j in range(K):
162
+ tj = t[:, j]
163
+ tmin, tmax = float(tj.min()), float(tj.max())
164
+ if not np.isfinite(tmin) or not np.isfinite(tmax) or tmax - tmin < 1e-6:
165
+ Hs.append(0.0)
166
+ continue
167
+ centers = np.linspace(tmin, tmax, bins)
168
+ dist2 = (tj[:, None] - centers[None, :]) ** 2
169
+ weights = softmax(-tau * dist2, axis=1)
170
+ hist = weights.mean(axis=0)
171
+ hist = np.clip(hist, 1e-12, None)
172
+ H = float(-(hist * np.log(hist)).sum())
173
+ Hs.append(H)
174
+ return float(np.mean(Hs)) if Hs else 0.0
175
+
176
+ def kmeans_plus_plus_init(z: np.ndarray, K: int, rng: np.random.RandomState) -> np.ndarray:
177
+ N, d = z.shape
178
+ inds = [rng.randint(0, N)]
179
+ centers = [z[inds[0]]]
180
+ cos0 = np.clip(z @ centers[0], -1.0, 1.0)
181
+ d2 = np.clip(1.0 - cos0, 1e-12, None)
182
+
183
+ for _ in range(1, K):
184
+ s = d2.sum()
185
+ if not np.isfinite(s) or s <= 0:
186
+ probs = np.full(N, 1.0 / N)
187
+ else:
188
+ probs = np.clip(d2 / s, 0.0, None)
189
+ probs = probs / (probs.sum() + 1e-12)
190
+ next_idx = rng.choice(N, p=probs)
191
+ inds.append(next_idx)
192
+ centers.append(z[next_idx])
193
+
194
+ cos_new = np.clip(z @ z[next_idx], -1.0, 1.0)
195
+ d2 = np.minimum(d2, np.clip(1.0 - cos_new, 1e-12, None))
196
+
197
+ U = np.stack(centers, axis=0)
198
+ U = U / (np.linalg.norm(U, axis=1, keepdims=True) + 1e-9)
199
+ return U
200
+
201
+ def chr_optimize(z: np.ndarray, K: int = 8, iters: int = 30, beta: float = 12.0,
202
+ bins: int = 8, tau: float = 5.0, seed: int = 42):
203
+ rng = np.random.RandomState(seed)
204
+ N, d = z.shape
205
+ U = kmeans_plus_plus_init(z, K, rng) if N >= K else np.pad(z, ((0, max(0, K - N)), (0, 0)), mode="wrap")[:K]
206
+ U = U / (np.linalg.norm(U, axis=1, keepdims=True) + 1e-9)
207
+
208
+ logits0 = beta * (z @ U.T)
209
+ p0 = softmax(logits0, axis=1)
210
+ Hg_traj = [global_range_entropy(p0)]
211
+ Hs_traj = [soft_slab_entropy(z, U, bins=bins, tau=tau)]
212
+
213
+ for _ in range(iters):
214
+ logits = beta * (z @ U.T)
215
+ p = softmax(logits, axis=1)
216
+ numer = p.T @ z
217
+ denom = p.sum(axis=0)[:, None] + 1e-9
218
+ U = numer / denom
219
+ U = U / (np.linalg.norm(U, axis=1, keepdims=True) + 1e-9)
220
+ Hg_traj.append(global_range_entropy(p))
221
+ Hs_traj.append(soft_slab_entropy(z, U, bins=bins, tau=tau))
222
+
223
+ logits = beta * (z @ U.T)
224
+ p = softmax(logits, axis=1)
225
+ return U, p, np.array(Hg_traj), np.array(Hs_traj)
226
+
227
+ def compute_mhep(Hg_traj: np.ndarray, Hs_traj: np.ndarray, K: int, bins: int, w_g: float = 0.7, w_s: float = 0.3) -> float:
228
+ if len(Hg_traj) < 2 or len(Hs_traj) < 2:
229
+ return 0.0
230
+ maxHg = math.log(max(K, 2))
231
+ maxHs = math.log(max(bins, 2))
232
+ drop_g = max(0.0, float(Hg_traj[0] - Hg_traj[-1])) / (maxHg + 1e-9)
233
+ drop_s = max(0.0, float(Hs_traj[0] - Hs_traj[-1])) / (maxHs + 1e-9)
234
+ return float(np.clip(100.0 * (w_g * drop_g + w_s * drop_s), 0.0, 100.0))
235
+
236
+ # -----------------------------
237
+ # CHR → discrete "compressed" byte stream (codes.bin payload)
238
+ # -----------------------------
239
+ def make_radial_bins(radials: np.ndarray, B: int = 64) -> np.ndarray:
240
+ edges = np.quantile(radials, np.linspace(0, 1, B + 1))
241
+ for i in range(1, len(edges)):
242
+ if edges[i] <= edges[i - 1]:
243
+ edges[i] = edges[i - 1] + 1e-6
244
+ return edges.astype(np.float32)
245
+
246
+ def quantize_radial(r: float, edges: np.ndarray) -> int:
247
+ b = np.searchsorted(edges, r, side="right") - 1
248
+ return int(np.clip(b, 0, len(edges) - 2))
249
+
250
+ def pack_codes_to_bytes(labels: np.ndarray, bins: np.ndarray) -> bytes:
251
+ out = bytearray()
252
+ for c, b in zip(labels.tolist(), bins.tolist()):
253
+ out.append(int(c) & 0xFF)
254
+ out.append(int(b) & 0xFF)
255
+ return bytes(out)
256
+
257
+ def save_codes_and_codec(code_bytes: bytes, codec: Dict, out_dir: str) -> Tuple[str, str]:
258
+ os.makedirs(out_dir, exist_ok=True)
259
+ bin_path = os.path.join(out_dir, "codes.bin")
260
+ meta_path = os.path.join(out_dir, "codec.json")
261
+ with open(bin_path, "wb") as f:
262
+ f.write(b"CHRC")
263
+ f.write(struct.pack("<I", 1))
264
+ f.write(code_bytes)
265
+ with open(meta_path, "w", encoding="utf-8") as f:
266
+ json.dump(codec, f, indent=2)
267
+ return bin_path, meta_path
268
+
269
+ # -----------------------------
270
+ # Trading Card layer (THE NEW PIECE)
271
+ # -----------------------------
272
+ CARD_MAGIC = b"TCAR" # Trading Card magic
273
+ CARD_VER = 1
274
+
275
+ def _sha256_hex(b: bytes) -> str:
276
+ return hashlib.sha256(b).hexdigest()
277
+
278
+ def _crc32_u32(b: bytes) -> int:
279
+ return zlib.crc32(b) & 0xFFFFFFFF
280
+
281
+ def pack_trading_card_payload(code_bytes: bytes, codec: Dict, title: str = "CHR Trading Card") -> bytes:
282
+ """
283
+ A self-contained binary payload that lives inside the card's pixels.
284
+
285
+ Layout:
286
+ magic(4) | ver(u32) | header_len(u32) | header_json | code_len(u32) | code_bytes
287
+ Header contains checksums for verification.
288
+ """
289
+ header = {
290
+ "title": title,
291
+ "codec": {
292
+ "backend": codec.get("backend"),
293
+ "K": codec.get("K"),
294
+ "radial_bins": codec.get("radial_bins"),
295
+ "iters": codec.get("iters"),
296
+ "beta": codec.get("beta"),
297
+ "slab_bins": codec.get("slab_bins"),
298
+ "tau": codec.get("tau"),
299
+ "seed": codec.get("seed"),
300
+ },
301
+ "units_count": codec.get("units_count"),
302
+ "bytes_per_unit": codec.get("bytes_per_unit"),
303
+ "code_len": int(len(code_bytes)),
304
+ "crc32": int(_crc32_u32(code_bytes)),
305
+ "sha256": _sha256_hex(code_bytes),
306
+ }
307
+ header_json = json.dumps(header, ensure_ascii=False).encode("utf-8")
308
+
309
+ blob = bytearray()
310
+ blob += CARD_MAGIC
311
+ blob += struct.pack("<I", CARD_VER)
312
+ blob += struct.pack("<I", len(header_json))
313
+ blob += header_json
314
+ blob += struct.pack("<I", len(code_bytes))
315
+ blob += code_bytes
316
+ return bytes(blob)
317
+
318
+ def unpack_trading_card_payload(payload: bytes) -> Tuple[Dict, bytes]:
319
+ """
320
+ Returns (header_dict, code_bytes) after validating structure.
321
+ """
322
+ if len(payload) < 16:
323
+ raise ValueError("Card payload too small.")
324
+ if payload[:4] != CARD_MAGIC:
325
+ raise ValueError("Card magic not found.")
326
+ ver = struct.unpack("<I", payload[4:8])[0]
327
+ if ver != CARD_VER:
328
+ raise ValueError(f"Unsupported card version: {ver}")
329
+ hlen = struct.unpack("<I", payload[8:12])[0]
330
+ off = 12
331
+ header_json = payload[off:off+hlen]
332
+ off += hlen
333
+ header = json.loads(header_json.decode("utf-8"))
334
+ clen = struct.unpack("<I", payload[off:off+4])[0]
335
+ off += 4
336
+ code_bytes = payload[off:off+clen]
337
+ if len(code_bytes) != clen:
338
+ raise ValueError("Card payload truncated.")
339
+ return header, code_bytes
340
+
341
+ def bytes_to_data_slab_image(payload: bytes, slab_w: int = 256) -> np.ndarray:
342
+ """
343
+ Convert payload bytes into a 2D slab (grayscale) image as uint8.
344
+ We pad to full rows.
345
+ """
346
+ arr = np.frombuffer(payload, dtype=np.uint8)
347
+ w = int(slab_w)
348
+ h = int(math.ceil(len(arr) / w))
349
+ pad = h*w - len(arr)
350
+ if pad > 0:
351
+ arr = np.concatenate([arr, np.zeros(pad, dtype=np.uint8)], axis=0)
352
+ slab = arr.reshape(h, w)
353
+ return slab
354
+
355
+ def data_slab_image_to_bytes(slab: np.ndarray, orig_len: int) -> bytes:
356
+ flat = slab.astype(np.uint8).ravel()
357
+ return bytes(flat[:orig_len])
358
+
359
+ def make_holo_front(U: np.ndarray, K: int, W: int, H: int, seed: int = 0) -> np.ndarray:
360
+ """
361
+ Create a holographic-looking RGB background from anchors U.
362
+ Deterministic and fast. This is "sizzle"; it doesn't contain the payload.
363
+ """
364
+ rng = np.random.RandomState(int(seed) + 123)
365
+ # pick a few anchor directions and random frequencies
366
+ d = U.shape[1]
367
+ n = min(K, 16)
368
+ idx = rng.choice(K, size=n, replace=K < n)
369
+ V = U[idx] # [n, d]
370
+ # Create a coordinate grid
371
+ yy, xx = np.mgrid[0:H, 0:W].astype(np.float32)
372
+ xx = (xx / max(1, W-1) - 0.5) * 2.0
373
+ yy = (yy / max(1, H-1) - 0.5) * 2.0
374
+
375
+ # derive frequencies and phases from U
376
+ freqs = rng.uniform(2.0, 10.0, size=n).astype(np.float32)
377
+ phases = rng.uniform(0, 2*np.pi, size=n).astype(np.float32)
378
+
379
+ # 3 channels from different mixtures
380
+ out = np.zeros((H, W, 3), dtype=np.float32)
381
+ for c in range(3):
382
+ acc = np.zeros((H, W), dtype=np.float32)
383
+ for i in range(n):
384
+ a = float(V[i, (c*7) % d])
385
+ b = float(V[i, (c*11 + 3) % d])
386
+ acc += np.cos(freqs[i] * (a*xx + b*yy) + phases[i])
387
+ # normalize 0..1
388
+ acc = (acc - acc.min()) / (acc.max() - acc.min() + 1e-9)
389
+ out[..., c] = acc
390
+
391
+ # add a subtle radial vignette
392
+ rr = np.sqrt(xx*xx + yy*yy)
393
+ vignette = np.clip(1.1 - 0.35*rr, 0.6, 1.1)
394
+ out *= vignette[..., None]
395
+ out = np.clip(out, 0.0, 1.0)
396
+ return (out * 255.0).astype(np.uint8)
397
+
398
+ def compose_trading_card(front_rgb: np.ndarray, slab_gray: np.ndarray, title: str, subtitle: str) -> np.ndarray:
399
+ """
400
+ Make a single card image:
401
+ - top: holo front with title/subtitle overlay
402
+ - bottom: data slab grid (this is where bytes live)
403
+ Output: RGB uint8 image.
404
+ """
405
+ Hf, Wf, _ = front_rgb.shape
406
+ slab_h, slab_w = slab_gray.shape
407
+
408
+ # Make slab into RGB
409
+ slab_rgb = np.stack([slab_gray]*3, axis=-1)
410
+
411
+ # Add a separator
412
+ sep = np.full((8, Wf, 3), 16, dtype=np.uint8)
413
+
414
+ # Resize slab to match card width (nearest)
415
+ if slab_w != Wf:
416
+ # simple nearest resize
417
+ slab_img = Image.fromarray(slab_gray, mode="L")
418
+ slab_img = slab_img.resize((Wf, slab_h), resample=Image.NEAREST)
419
+ slab_gray2 = np.array(slab_img, dtype=np.uint8)
420
+ slab_rgb = np.stack([slab_gray2]*3, axis=-1)
421
+
422
+ card = np.concatenate([front_rgb, sep, slab_rgb], axis=0)
423
+
424
+ # Overlay text on front using PIL (fast and dependency-light)
425
+ try:
426
+ pil = Image.fromarray(card, mode="RGB")
427
+ from PIL import ImageDraw, ImageFont
428
+ draw = ImageDraw.Draw(pil)
429
+ # default font
430
+ font1 = ImageFont.load_default()
431
+ font2 = ImageFont.load_default()
432
+
433
+ # Place title/subtitle
434
+ draw.rectangle([0, 0, Wf, 26], fill=(0, 0, 0))
435
+ draw.text((10, 6), title, fill=(255, 255, 255), font=font1)
436
+ draw.text((10, 34), subtitle, fill=(255, 255, 255), font=font2)
437
+
438
+ # Add a "foil" frame
439
+ draw.rectangle([4, 4, Wf-5, Hf-5], outline=(220, 220, 255), width=2)
440
+
441
+ card = np.array(pil, dtype=np.uint8)
442
+ except Exception:
443
+ pass
444
+
445
+ return card
446
+
447
+ def save_png(arr: np.ndarray, path: str):
448
+ Image.fromarray(arr).save(path, format="PNG")
449
+
450
+ def load_png(path: str) -> np.ndarray:
451
+ return np.array(Image.open(path).convert("RGB"), dtype=np.uint8)
452
+
453
+ def extract_payload_from_card(card_rgb: np.ndarray, slab_top: int, slab_w: int, payload_len: int) -> bytes:
454
+ """
455
+ Extract bytes from the slab region (grayscale interpretation).
456
+ We read from the card's bottom data slab.
457
+ """
458
+ slab_rgb = card_rgb[slab_top:, :, :]
459
+ slab_gray = slab_rgb[..., 0].astype(np.uint8) # stored grayscale replicated in RGB
460
+ # slab_gray already width==card width, we assume slab_w==card width
461
+ slab = slab_gray
462
+ return data_slab_image_to_bytes(slab, payload_len)
463
+
464
+ # -----------------------------
465
+ # Visuals (existing + card-specific)
466
+ # -----------------------------
467
+ def plot_entropy(Hg, Hs, out_path):
468
+ plt.figure(figsize=(6,4))
469
+ plt.plot(Hg, label="Global range entropy")
470
+ plt.plot(Hs, label="Slab entropy")
471
+ plt.xlabel("Iteration"); plt.ylabel("Entropy")
472
+ plt.title("Entropy drops during CHR compression")
473
+ plt.legend()
474
+ plt.tight_layout()
475
+ plt.savefig(out_path, dpi=150)
476
+ plt.close()
477
+
478
+ def plot_constellation_map(z, U, labels, out_path):
479
+ if z.shape[1] > 2:
480
+ pca = PCA(n_components=2, random_state=0)
481
+ Z2 = pca.fit_transform(z)
482
+ U2 = pca.transform(U)
483
+ else:
484
+ Z2, U2 = z, U
485
+ plt.figure(figsize=(6,5))
486
+ plt.scatter(Z2[:,0], Z2[:,1], s=14, alpha=0.8, c=labels)
487
+ plt.scatter(U2[:,0], U2[:,1], marker="*", s=200)
488
+ plt.title("Constellation map (compressed geometry)")
489
+ plt.xlabel("PC1"); plt.ylabel("PC2")
490
+ plt.tight_layout()
491
+ plt.savefig(out_path, dpi=150)
492
+ plt.close()
493
+
494
+ def plot_training_curves(losses, ppls, out_path):
495
+ plt.figure(figsize=(6,4))
496
+ plt.plot(losses, label="Loss")
497
+ plt.plot(ppls, label="Perplexity")
498
+ plt.xlabel("Checkpoint")
499
+ plt.title("Learning on trading card pixels")
500
+ plt.legend()
501
+ plt.tight_layout()
502
+ plt.savefig(out_path, dpi=150)
503
+ plt.close()
504
+
505
+ def plot_rollout_tracks(seq_bytes: List[int], out_path, title="Rollout (byte tokens)"):
506
+ plt.figure(figsize=(8,3.6))
507
+ plt.plot(seq_bytes, label="Byte value")
508
+ plt.ylim(-2, 260)
509
+ plt.xlabel("Step"); plt.title(title)
510
+ plt.legend()
511
+ plt.tight_layout()
512
+ plt.savefig(out_path, dpi=150)
513
+ plt.close()
514
+
515
+ def plot_before_after_tracks(before_bytes: List[int], after_bytes: List[int], out_path):
516
+ plt.figure(figsize=(10,4))
517
+ plt.subplot(1,2,1)
518
+ plt.plot(before_bytes, label="Byte value")
519
+ plt.title("BEFORE (untrained)")
520
+ plt.ylim(-2, 260)
521
+ plt.legend()
522
+
523
+ plt.subplot(1,2,2)
524
+ plt.plot(after_bytes, label="Byte value")
525
+ plt.title("AFTER (trained)")
526
+ plt.ylim(-2, 260)
527
+ plt.legend()
528
+
529
+ plt.suptitle("Rollout comparison (trained on card pixels)")
530
+ plt.tight_layout()
531
+ plt.savefig(out_path, dpi=150)
532
+ plt.close()
533
+
534
+ def make_card_tilt_gif(card_rgb: np.ndarray, out_path: str, frames: int = 24, fps: int = 12):
535
+ """
536
+ Cheap holo tilt effect: shift color channels + brightness gradient over the front region.
537
+ This is pure sizzle and very fast.
538
+ """
539
+ H, W, _ = card_rgb.shape
540
+ frames = int(max(8, min(frames, 48)))
541
+ fps = int(max(6, min(fps, 24)))
542
+
543
+ imgs = []
544
+ for t in range(frames):
545
+ a = (t / frames) * 2*np.pi
546
+ dx = int(2 + 3*np.sin(a))
547
+ dy = int(2 + 3*np.cos(a))
548
+
549
+ img = card_rgb.copy().astype(np.int16)
550
+
551
+ # apply gentle "tilt" to the top half (front)
552
+ front_h = int(H * 0.45)
553
+ yy, xx = np.mgrid[0:front_h, 0:W]
554
+ grad = (0.85 + 0.15*np.sin(a + (xx / max(1, W-1))*2*np.pi)).astype(np.float32)
555
+
556
+ # channel shift
557
+ r = np.roll(img[:front_h, :, 0], shift=dx, axis=1)
558
+ g = np.roll(img[:front_h, :, 1], shift=dy, axis=0)
559
+ b = img[:front_h, :, 2]
560
+
561
+ img[:front_h, :, 0] = (r * grad).astype(np.int16)
562
+ img[:front_h, :, 1] = (g * grad).astype(np.int16)
563
+ img[:front_h, :, 2] = (b * grad).astype(np.int16)
564
+
565
+ img = np.clip(img, 0, 255).astype(np.uint8)
566
+ imgs.append(img)
567
+
568
+ imageio.mimsave(out_path, imgs, fps=fps)
569
+
570
+ # -----------------------------
571
+ # Training: byte-model reads ONLY the trading card PNG pixels
572
+ # -----------------------------
573
+ import torch
574
+ import torch.nn as nn
575
+ from torch.utils.data import Dataset, DataLoader
576
+
577
+ class CardByteDataset(Dataset):
578
+ """
579
+ Produces next-byte prediction windows from the trading card's payload bytes.
580
+ Importantly, it reads from the CARD PNG (pixels) every time.
581
+ """
582
+ def __init__(self, card_png_path: str, payload_len: int, slab_top: int, block_size: int = 128):
583
+ self.card_png_path = card_png_path
584
+ self.payload_len = int(payload_len)
585
+ self.slab_top = int(slab_top)
586
+ self.block_size = int(block_size)
587
+
588
+ card = load_png(card_png_path)
589
+ slab_rgb = card[self.slab_top:, :, :]
590
+ slab_gray = slab_rgb[..., 0].astype(np.uint8)
591
+ flat = slab_gray.ravel()
592
+ self.bytes = torch.tensor(list(flat[:self.payload_len]), dtype=torch.long)
593
+
594
+ def __len__(self):
595
+ return max(0, len(self.bytes) - self.block_size - 1)
596
+
597
+ def __getitem__(self, idx):
598
+ x = self.bytes[idx:idx+self.block_size]
599
+ y = self.bytes[idx+1:idx+self.block_size+1]
600
+ return x, y
601
+
602
+ class TinyByteTransformer(nn.Module):
603
+ """
604
+ FAST investor demo model: small and quick on CPU/GPU.
605
+ """
606
+ def __init__(self, vocab_size=256, d_model=128, n_layers=2, n_heads=4, block_size=128):
607
+ super().__init__()
608
+ self.tok = nn.Embedding(vocab_size, d_model)
609
+ self.pos = nn.Embedding(block_size, d_model)
610
+ enc_layer = nn.TransformerEncoderLayer(
611
+ d_model=d_model, nhead=n_heads, dim_feedforward=4*d_model,
612
+ dropout=0.1, batch_first=True
613
+ )
614
+ self.tr = nn.TransformerEncoder(enc_layer, num_layers=n_layers)
615
+ self.lm = nn.Linear(d_model, vocab_size)
616
+ self.block_size = int(block_size)
617
+
618
+ def forward(self, x):
619
+ B, T = x.shape
620
+ pos = torch.arange(T, device=x.device).unsqueeze(0).expand(B, T)
621
+ h = self.tok(x) + self.pos(pos)
622
+ mask = torch.triu(torch.ones(T, T, device=x.device), diagonal=1).bool()
623
+ h = self.tr(h, mask=mask)
624
+ return self.lm(h)
625
+
626
+ @torch.no_grad()
627
+ def sample_bytes(model, start: List[int], steps: int, device: str = "cpu", temperature: float = 1.0) -> List[int]:
628
+ model.eval()
629
+ seq = start[:]
630
+ steps = int(steps)
631
+ for _ in range(steps):
632
+ x = torch.tensor(seq[-model.block_size:], dtype=torch.long, device=device).unsqueeze(0)
633
+ logits = model(x)[0, -1] / max(1e-6, float(temperature))
634
+ probs = torch.softmax(logits, dim=-1)
635
+ nxt = int(torch.multinomial(probs, num_samples=1).item())
636
+ seq.append(nxt)
637
+ return seq
638
+
639
+ def train_on_card_png(card_png_path: str,
640
+ payload_len: int,
641
+ slab_top: int,
642
+ steps: int = 250,
643
+ batch_size: int = 32,
644
+ block_size: int = 128,
645
+ lr: float = 5e-4,
646
+ device: str = "cpu",
647
+ log_every: int = 25):
648
+ ds = CardByteDataset(card_png_path, payload_len=payload_len, slab_top=slab_top, block_size=block_size)
649
+ n_windows = len(ds)
650
+ if n_windows <= 0:
651
+ raise RuntimeError(f"Card payload too small for block_size={block_size}. Reduce block_size or increase data.")
652
+
653
+ # avoid drop_last if small
654
+ drop_last = n_windows >= batch_size
655
+ dl = DataLoader(ds, batch_size=batch_size, shuffle=True, drop_last=drop_last)
656
+ it = iter(dl)
657
+
658
+ model = TinyByteTransformer(block_size=block_size).to(device)
659
+ opt = torch.optim.AdamW(model.parameters(), lr=lr)
660
+ loss_fn = nn.CrossEntropyLoss()
661
+
662
+ losses, ppls = [], []
663
+ steps = int(steps)
664
+ log_every = max(1, int(log_every))
665
+
666
+ model.train()
667
+ for step in range(1, steps+1):
668
+ try:
669
+ x, y = next(it)
670
+ except StopIteration:
671
+ it = iter(dl)
672
+ x, y = next(it)
673
+
674
+ x, y = x.to(device), y.to(device)
675
+ logits = model(x)
676
+ loss = loss_fn(logits.view(-1, 256), y.view(-1))
677
+
678
+ opt.zero_grad(set_to_none=True)
679
+ loss.backward()
680
+ torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
681
+ opt.step()
682
+
683
+ if step % log_every == 0:
684
+ l = float(loss.detach().cpu().item())
685
+ ppl = float(torch.exp(loss.detach()).cpu().item())
686
+ losses.append(l)
687
+ ppls.append(ppl)
688
+
689
+ return model, losses, ppls
690
+
691
+ # -----------------------------
692
+ # App state
693
+ # -----------------------------
694
+ STATE = {
695
+ "units": None,
696
+ "Z": None,
697
+ "U": None,
698
+ "labels": None,
699
+ "bins": None,
700
+ "bin_path": None,
701
+ "codec_path": None,
702
+ "codec": None,
703
+
704
+ "card_png_path": None,
705
+ "card_payload_len": None,
706
+ "card_slab_top": None,
707
+ "card_header": None,
708
+
709
+ "model": None,
710
+ }
711
+
712
+ def _bytes_from_upload(file_obj) -> Tuple[bytes, str]:
713
+ if file_obj is None:
714
+ return b"", ""
715
+ if isinstance(file_obj, str) and os.path.exists(file_obj):
716
+ return Path(file_obj).read_bytes(), os.path.basename(file_obj)
717
+ if hasattr(file_obj, "name") and os.path.exists(file_obj.name):
718
+ return Path(file_obj.name).read_bytes(), os.path.basename(file_obj.name)
719
+ return b"", "upload"
720
+
721
+ # -----------------------------
722
+ # Callbacks
723
+ # -----------------------------
724
+ def load_demo(units_mode: str):
725
+ raw = (DEMO_CORPUS.strip() + "\n\n") * 80
726
+ units = to_units(raw, units_mode)
727
+ units = [u.strip() for u in units if u.strip()]
728
+ STATE["units"] = units
729
+ return f"Loaded **{len(units)}** demo units (built-in corpus)."
730
+
731
+ def ingest_file(file_obj, units_mode: str):
732
+ try:
733
+ b, name = _bytes_from_upload(file_obj)
734
+ if not b:
735
+ return "Upload a .txt or .docx file to begin."
736
+
737
+ if name.lower().endswith(".docx"):
738
+ paras = read_docx_bytes(b)
739
+ raw = "\n\n".join(paras)
740
+ else:
741
+ raw = read_txt_bytes(b)
742
+
743
+ units = to_units(raw, units_mode)
744
+ units = [u.strip() for u in units if u.strip()]
745
+ if len(units) > 5000:
746
+ units = units[:5000]
747
+
748
+ STATE["units"] = units
749
+ return f"Loaded **{len(units)}** units from **{name}**."
750
+ except Exception as e:
751
+ return f"Error ingesting file: {e}"
752
+
753
+ def compress_and_make_card(K, iters, beta, slab_bins, tau, seed, radial_bins,
754
+ card_width, front_height, title_text):
755
+ """
756
+ 1) CHR compress
757
+ 2) build codes.bin + codec.json
758
+ 3) build trading card PNG that embeds a full self-contained payload
759
+ 4) verify by extracting payload back from the PNG
760
+ """
761
+ try:
762
+ units = STATE.get("units")
763
+ if not units:
764
+ return "No units loaded. Upload or load demo corpus.", None, None, None, None, None, None
765
+
766
+ # --- CHR compression ---
767
+ Z, backend = embed_texts(units, prefer_sentence_transformer=True)
768
+ U, p, Hg, Hs = chr_optimize(
769
+ Z, K=int(K), iters=int(iters), beta=float(beta),
770
+ bins=int(slab_bins), tau=float(tau), seed=int(seed)
771
+ )
772
+ labels = p.argmax(axis=1).astype(np.int32)
773
+ proj = Z @ U.T
774
+ radials = proj[np.arange(len(units)), labels].astype(np.float32)
775
+
776
+ edges = make_radial_bins(radials, B=int(radial_bins))
777
+ bins_q = np.array([quantize_radial(float(radials[i]), edges) for i in range(len(units))], dtype=np.int32)
778
+
779
+ code_bytes = pack_codes_to_bytes(labels, bins_q)
780
+
781
+ # --- Save codes.bin + codec.json (for audit/download only) ---
782
+ out_dir = tempfile.mkdtemp()
783
+ codec = {
784
+ "backend": backend,
785
+ "K": int(K),
786
+ "radial_bins": int(radial_bins),
787
+ "iters": int(iters),
788
+ "beta": float(beta),
789
+ "slab_bins": int(slab_bins),
790
+ "tau": float(tau),
791
+ "seed": int(seed),
792
+ "U": U.tolist(),
793
+ "radial_edges": edges.tolist(),
794
+ "units_count": int(len(units)),
795
+ "bytes_per_unit": 2.0,
796
+ "total_bytes": int(len(code_bytes) + 8),
797
+ }
798
+ bin_path, codec_path = save_codes_and_codec(code_bytes, codec, out_dir)
799
+
800
+ # --- Build trading card payload ---
801
+ payload = pack_trading_card_payload(code_bytes=code_bytes, codec=codec, title=str(title_text).strip()[:120] or "CHR Trading Card")
802
+ payload_len = len(payload)
803
+
804
+ # --- Render data slab and holo front ---
805
+ card_w = int(card_width)
806
+ front_h = int(front_height)
807
+ slab = bytes_to_data_slab_image(payload, slab_w=card_w) # grayscale slab holding payload
808
+ slab_h = slab.shape[0]
809
+ # front with same width
810
+ front = make_holo_front(np.array(U, dtype=np.float32), K=int(K), W=card_w, H=front_h, seed=int(seed))
811
+
812
+ mhep = compute_mhep(Hg, Hs, K=int(K), bins=int(slab_bins))
813
+
814
+ subtitle = f"Units={len(units)} K={int(K)} Bytes={payload_len} CRC32={_crc32_u32(code_bytes):08x}"
815
+ card_rgb = compose_trading_card(front_rgb=front, slab_gray=slab, title=str(title_text).strip() or "CHR Trading Card", subtitle=subtitle)
816
+
817
+ # --- Save PNG ---
818
+ card_png_path = os.path.join(out_dir, "trading_card.png")
819
+ save_png(card_rgb, card_png_path)
820
+
821
+ # --- Determine slab top for extraction ---
822
+ sep_h = 8
823
+ slab_top = front_h + sep_h
824
+
825
+ # --- Verify by extracting payload back from PNG ---
826
+ card_loaded = load_png(card_png_path)
827
+ extracted = extract_payload_from_card(card_loaded, slab_top=slab_top, slab_w=card_w, payload_len=payload_len)
828
+ header2, code2 = unpack_trading_card_payload(extracted)
829
+
830
+ ok_crc = (_crc32_u32(code2) == int(header2["crc32"]))
831
+ ok_sha = (_sha256_hex(code2) == str(header2["sha256"]))
832
+ verified = (ok_crc and ok_sha and len(code2) == len(code_bytes))
833
+
834
+ # Save extra visuals
835
+ ent_plot = os.path.join(out_dir, "entropy.png")
836
+ map_plot = os.path.join(out_dir, "map.png")
837
+ plot_entropy(Hg, Hs, ent_plot)
838
+ plot_constellation_map(Z, U, labels, map_plot)
839
+
840
+ tilt_gif = os.path.join(out_dir, "card_tilt.gif")
841
+ # small tilt gif (fast)
842
+ make_card_tilt_gif(card_rgb, tilt_gif, frames=18, fps=12)
843
+
844
+ STATE.update({
845
+ "Z": Z, "U": U, "labels": labels, "bins": bins_q,
846
+ "bin_path": bin_path, "codec_path": codec_path, "codec": codec,
847
+ "card_png_path": card_png_path,
848
+ "card_payload_len": payload_len,
849
+ "card_slab_top": slab_top,
850
+ "card_header": header2,
851
+ "model": None
852
+ })
853
+
854
+ report = (
855
+ f"## Trading Card Generated\n"
856
+ f"- **Embedding backend:** `{backend}`\n"
857
+ f"- **Units:** **{len(units)}**\n"
858
+ f"- **Constellations (K):** **{int(K)}**\n"
859
+ f"- **Radial bins:** **{int(radial_bins)}**\n"
860
+ f"- **Card width:** **{card_w}px**\n"
861
+ f"- **Payload bytes inside card:** **{payload_len}**\n"
862
+ f"- **Code bytes (constellation+radial):** **{len(code_bytes)}**\n"
863
+ f"- **MHEP score:** **{mhep:.1f}%**\n"
864
+ f"\n### Integrity\n"
865
+ f"- CRC32 match: **{str(ok_crc)}**\n"
866
+ f"- SHA256 match: **{str(ok_sha)}**\n"
867
+ f"- **Verified:** {'✅ YES' if verified else '❌ NO'}\n"
868
+ f"\n### Investor-proof constraint\n"
869
+ f"Training can now read **only** the **PNG trading card pixels**."
870
+ )
871
+
872
+ header_json = json.dumps(header2, indent=2)
873
+
874
+ return report, ent_plot, map_plot, card_png_path, tilt_gif, bin_path, codec_path, header_json
875
+ except Exception as e:
876
+ return f"Error: {e}\n\n{traceback.format_exc()}", None, None, None, None, None, None, None
877
+
878
+ def train_from_card(train_steps, batch_size, block_size, lr, log_every,
879
+ temperature, rollout_steps, make_gif, gif_stride, gif_fps, gif_max_frames):
880
+ """
881
+ Train byte-level transformer on bytes extracted from the trading card PNG.
882
+ Training uses ONLY the PNG pixels.
883
+ """
884
+ try:
885
+ card_png_path = STATE.get("card_png_path")
886
+ payload_len = STATE.get("card_payload_len")
887
+ slab_top = STATE.get("card_slab_top")
888
+ header = STATE.get("card_header")
889
+
890
+ if not card_png_path or not os.path.exists(card_png_path) or payload_len is None or slab_top is None:
891
+ return "No trading card found. Generate a card first.", None, None, None, None
892
+
893
+ device = "cuda" if torch.cuda.is_available() else "cpu"
894
+
895
+ # Re-load card and extract payload bytes (PNG pixels only)
896
+ card_rgb = load_png(card_png_path)
897
+ extracted = extract_payload_from_card(card_rgb, slab_top=slab_top, slab_w=card_rgb.shape[1], payload_len=int(payload_len))
898
+
899
+ # Parse payload and verify again (still only from pixels)
900
+ header2, code_bytes = unpack_trading_card_payload(extracted)
901
+ ok_crc = (_crc32_u32(code_bytes) == int(header2["crc32"]))
902
+ ok_sha = (_sha256_hex(code_bytes) == str(header2["sha256"]))
903
+ verified = (ok_crc and ok_sha)
904
+
905
+ # Auto-tune for speed + guarantee it runs
906
+ L = len(extracted) # training bytes are the full payload
907
+ user_block = int(block_size)
908
+ user_bs = int(batch_size)
909
+
910
+ tuned_block = min(user_block, max(32, L // 10))
911
+ tuned_block = min(tuned_block, max(32, L - 2))
912
+ block_size = int(tuned_block)
913
+
914
+ n_windows = max(0, L - block_size - 1)
915
+ tuned_bs = min(user_bs, max(8, n_windows // 4)) if n_windows > 0 else 1
916
+ batch_size = int(max(1, tuned_bs))
917
+
918
+ # Start context for sampling: from the payload bytes (not codes.bin)
919
+ start = list(extracted[:block_size])
920
+
921
+ out_dir = os.path.dirname(card_png_path)
922
+
923
+ # BEFORE rollout (untrained)
924
+ untrained = TinyByteTransformer(block_size=block_size).to(device)
925
+ before_seq = sample_bytes(untrained, start=start, steps=int(rollout_steps), device=device, temperature=float(temperature))
926
+ before_plot = os.path.join(out_dir, "rollout_before.png")
927
+ plot_rollout_tracks(before_seq[-int(rollout_steps):], before_plot, title="BEFORE training (random)")
928
+
929
+ # Train
930
+ model, losses, ppls = train_on_card_png(
931
+ card_png_path=card_png_path,
932
+ payload_len=int(payload_len),
933
+ slab_top=int(slab_top),
934
+ steps=int(train_steps),
935
+ batch_size=batch_size,
936
+ block_size=block_size,
937
+ lr=float(lr),
938
+ device=device,
939
+ log_every=int(log_every),
940
+ )
941
+ STATE["model"] = model
942
+
943
+ train_plot = os.path.join(out_dir, "training.png")
944
+ plot_training_curves(losses, ppls, train_plot)
945
+
946
+ # AFTER rollout
947
+ after_seq = sample_bytes(model, start=start, steps=int(rollout_steps), device=device, temperature=float(temperature))
948
+ after_plot = os.path.join(out_dir, "rollout_after.png")
949
+ plot_rollout_tracks(after_seq[-int(rollout_steps):], after_plot, title="AFTER training (trained)")
950
+
951
+ # Compare
952
+ compare_plot = os.path.join(out_dir, "rollout_compare.png")
953
+ plot_before_after_tracks(before_seq[-int(rollout_steps):], after_seq[-int(rollout_steps):], compare_plot)
954
+
955
+ # Optional GIF (cap frames)
956
+ gif_path = None
957
+ if bool(make_gif):
958
+ gif_path = os.path.join(out_dir, "rollout.gif")
959
+ # Make a lightweight GIF using the byte track plot frames (fast)
960
+ # We'll render a few frames by progressively revealing the curve
961
+ seq = after_seq[-int(rollout_steps):]
962
+ stride = max(1, int(gif_stride))
963
+ fps = max(6, int(gif_fps))
964
+ max_frames = max(12, int(gif_max_frames))
965
+
966
+ frames = []
967
+ count = 0
968
+ for t in range(10, len(seq), stride):
969
+ fig = plt.figure(figsize=(7,3.6))
970
+ plt.plot(seq[:t], linewidth=2)
971
+ plt.ylim(-2, 260)
972
+ plt.title("AFTER training — rollout from trading card pixels")
973
+ plt.xlabel("Step"); plt.ylabel("Byte value")
974
+ plt.tight_layout()
975
+ buf = io.BytesIO()
976
+ plt.savefig(buf, format="png", dpi=140)
977
+ plt.close(fig)
978
+ buf.seek(0)
979
+ frames.append(imageio.imread(buf))
980
+ count += 1
981
+ if count >= max_frames:
982
+ break
983
+ imageio.mimsave(gif_path, frames, fps=fps)
984
+
985
+ report = (
986
+ f"## Training Complete (PNG-only)\n"
987
+ f"- **Device:** `{device}`\n"
988
+ f"- **Integrity (from pixels):** {'✅ Verified' if verified else '❌ Not verified'}\n"
989
+ f"- **Payload bytes used for training:** **{L}**\n"
990
+ f"- **Auto block_size:** **{block_size}** (requested {user_block})\n"
991
+ f"- **Auto batch_size:** **{batch_size}** (requested {user_bs})\n"
992
+ f"- **Steps:** **{int(train_steps)}** (logged every {int(log_every)})\n"
993
+ f"- **Final logged loss:** **{losses[-1]:.4f}**\n"
994
+ f"- **Final logged perplexity:** **{ppls[-1]:.2f}**\n"
995
+ f"\n### What investors should notice\n"
996
+ f"Perplexity falls while training from **a single trading card image**."
997
+ )
998
+
999
+ metrics = {"loss": losses, "ppl": ppls}
1000
+ return report, train_plot, compare_plot, gif_path, json.dumps(metrics, indent=2)
1001
+ except Exception as e:
1002
+ return f"Training error: {e}\n\n{traceback.format_exc()}", None, None, None, None
1003
+
1004
+ # -----------------------------
1005
+ # Gradio UI
1006
+ # -----------------------------
1007
+ INTRO = """
1008
+ # Trading Card Learning (Level 1)
1009
+ **Pipeline:**
1010
+ 1) Compress dataset → **constellation/radial codes**
1011
+ 2) Pack codes into a **single PNG trading card**
1012
+ 3) Train a tiny model using **only the PNG pixels**
1013
+
1014
+ This is the “data becomes a trading card” end goal.
1015
+ """
1016
+
1017
+ with gr.Blocks(title="Trading Card Learning (CHR)") as demo:
1018
+ gr.Markdown(INTRO)
1019
+
1020
+ with gr.Tab("1) Ingest"):
1021
+ with gr.Row():
1022
+ file_in = gr.File(label="Upload .txt or .docx", file_types=[".txt", ".docx"])
1023
+ units_mode = gr.Radio(["paragraphs", "sentences"], value="sentences", label="Unit granularity")
1024
+ with gr.Row():
1025
+ ingest_btn = gr.Button("Load file", variant="primary")
1026
+ demo_btn = gr.Button("Load built-in demo corpus", variant="secondary")
1027
+ ingest_status = gr.Markdown("")
1028
+ ingest_btn.click(ingest_file, inputs=[file_in, units_mode], outputs=[ingest_status])
1029
+ demo_btn.click(load_demo, inputs=[units_mode], outputs=[ingest_status])
1030
+
1031
+ with gr.Tab("2) Compress → Trading Card"):
1032
+ with gr.Row():
1033
+ K = gr.Slider(2, 48, value=16, step=1, label="K (constellations)")
1034
+ iters = gr.Slider(5, 120, value=35, step=1, label="CHR iterations")
1035
+ beta = gr.Slider(2, 30, value=16, step=1, label="beta (assignment sharpness)")
1036
+ with gr.Row():
1037
+ slab_bins = gr.Slider(3, 16, value=8, step=1, label="slab bins (entropy measure)")
1038
+ tau = gr.Slider(1, 20, value=5, step=1, label="tau (slab softness)")
1039
+ radial_bins = gr.Slider(8, 256, value=64, step=8, label="radial bins (compression alphabet)")
1040
+ seed = gr.Slider(0, 9999, value=42, step=1, label="seed")
1041
+ with gr.Row():
1042
+ card_width = gr.Slider(128, 512, value=256, step=32, label="Card width (pixels)")
1043
+ front_height = gr.Slider(96, 320, value=160, step=16, label="Front (holo) height (pixels)")
1044
+ title_text = gr.Textbox(value="CHR Trading Card", label="Card title")
1045
+
1046
+ compress_btn = gr.Button("Generate Trading Card PNG", variant="primary")
1047
+ compress_report = gr.Markdown("")
1048
+ with gr.Row():
1049
+ ent_img = gr.Image(label="Entropy during compression", type="filepath")
1050
+ map_img = gr.Image(label="Constellation map (PCA)", type="filepath")
1051
+ with gr.Row():
1052
+ card_img = gr.Image(label="Trading Card PNG (contains the data)", type="filepath")
1053
+ card_tilt = gr.Image(label="Holo tilt (GIF)", type="filepath")
1054
+ with gr.Row():
1055
+ codes_bin = gr.File(label="codes.bin (audit only)")
1056
+ codec_json = gr.File(label="codec.json (audit only)")
1057
+ card_header = gr.Code(label="Trading card header (from pixels)", language="json")
1058
+
1059
+ compress_btn.click(
1060
+ compress_and_make_card,
1061
+ inputs=[K, iters, beta, slab_bins, tau, seed, radial_bins, card_width, front_height, title_text],
1062
+ outputs=[compress_report, ent_img, map_img, card_img, card_tilt, codes_bin, codec_json, card_header]
1063
+ )
1064
+
1065
+ with gr.Tab("3) Train from Trading Card"):
1066
+ with gr.Row():
1067
+ train_steps = gr.Slider(50, 2000, value=250, step=50, label="training steps (fast demo default)")
1068
+ batch_size = gr.Slider(4, 128, value=32, step=4, label="batch size")
1069
+ block_size = gr.Slider(32, 256, value=128, step=16, label="sequence length (bytes)")
1070
+ with gr.Row():
1071
+ lr = gr.Number(value=5e-4, label="learning rate")
1072
+ log_every = gr.Slider(10, 200, value=25, step=5, label="log every (steps)")
1073
+ temperature = gr.Slider(0.5, 2.0, value=1.0, step=0.05, label="rollout temperature")
1074
+ rollout_steps = gr.Slider(40, 400, value=120, step=20, label="rollout steps (bytes)")
1075
+ with gr.Row():
1076
+ make_gif = gr.Checkbox(value=False, label="Generate rollout GIF (adds time)")
1077
+ gif_stride = gr.Slider(1, 12, value=5, step=1, label="GIF stride (higher = faster)")
1078
+ gif_fps = gr.Slider(6, 24, value=12, step=1, label="GIF FPS")
1079
+ gif_max_frames = gr.Slider(12, 120, value=40, step=4, label="GIF max frames (cap)")
1080
+
1081
+ train_btn = gr.Button("Train from PNG pixels + generate visuals", variant="primary")
1082
+ train_report = gr.Markdown("")
1083
+ with gr.Row():
1084
+ train_img = gr.Image(label="Loss + perplexity", type="filepath")
1085
+ compare_img = gr.Image(label="BEFORE vs AFTER rollout", type="filepath")
1086
+ gif_out = gr.Image(label="Rollout GIF (optional)", type="filepath")
1087
+ metrics_json = gr.Code(label="Metrics (JSON)", language="json")
1088
+
1089
+ train_btn.click(
1090
+ train_from_card,
1091
+ inputs=[train_steps, batch_size, block_size, lr, log_every, temperature, rollout_steps,
1092
+ make_gif, gif_stride, gif_fps, gif_max_frames],
1093
+ outputs=[train_report, train_img, compare_img, gif_out, metrics_json]
1094
+ )
1095
+
1096
+ if __name__ == "__main__":
1097
+ # Disable SSR for stability / fewer asyncio warnings in Spaces
1098
+ demo.launch(ssr_mode=False)