Spaces:
Running
Add UNAY + Khipu-LMDB v2 organs (ADDITIVE) — killinchu
Browse filesAdd UNAY + Khipu-LMDB v2 organs (ADDITIVE) — killinchu
UNAY: receipt-keyed semantic memory (sqlite + sqlite-vss, hashing-embedder/v1,
LRU eviction, chain-verified append-only log). Real sqlite-vss recall when the
extension loads; honest cosine-fallback otherwise.
Khipu-LMDB: durable hash-chained receipts on a real lmdb env (survives restart).
Cross-Space replication: inbound accepted (re-verified); outbound DISABLED by default.
New endpoints (ADDITIVE — no existing route modified or deleted):
GET /api/killinchu/v2/unay/healthz|stats|verify
POST /api/killinchu/v2/unay/remember|recall (+ GET /recall?q=)
GET /api/killinchu/v2/khipu/lmdb/stats|verify|tail
POST /api/killinchu/v2/khipu/lmdb/append
POST /api/killinchu/v2/khipu/replicate GET .../replicate/status
Local: 27/27 pytest pass; LMDB write+kill+restart+read roundtrip verified.
Doctrine v11 LOCKED: 749 declarations / 14 unique axioms / 163 sorries / 13-axis.
Signed: Yachay
Co-Authored-By: Perplexity Computer Agent
- Dockerfile +12 -0
- serve.py +17 -0
- szl_khipu_lmdb.py +241 -0
- szl_khipu_replicate.py +150 -0
- szl_unay.py +443 -0
- szl_unay_routes.py +238 -0
|
@@ -65,4 +65,16 @@ COPY serve.py ./serve.py
|
|
| 65 |
ENV PORT=7860
|
| 66 |
EXPOSE 7860
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
CMD ["python", "serve.py"]
|
|
|
|
| 65 |
ENV PORT=7860
|
| 66 |
EXPOSE 7860
|
| 67 |
|
| 68 |
+
# ADDITIVE (UNAY + Khipu-LMDB v2, 2026-06-01, Yachay): real durable lmdb persistence
|
| 69 |
+
# + optional sqlite-vss vector recall (szl_unay degrades to honest cosine-fallback if
|
| 70 |
+
# the extension cannot load in the slim image). Never affects existing routes.
|
| 71 |
+
RUN pip install --no-cache-dir "lmdb>=1.4.0" "sqlite-vss>=0.1.2"
|
| 72 |
+
# ADDITIVE (UNAY + Khipu-LMDB v2, 2026-06-01, Yachay / Perplexity Computer Agent):
|
| 73 |
+
# explicit per-file COPY (this Dockerfile does not use `COPY . .`). serve.py imports
|
| 74 |
+
# szl_unay_routes and calls .register(app, ns="killinchu") -> /api/killinchu/v2/unay/* +
|
| 75 |
+
# /api/killinchu/v2/khipu/lmdb/*. Real durable lmdb + real sqlite-vss honest fallback.
|
| 76 |
+
COPY szl_unay.py ./szl_unay.py
|
| 77 |
+
COPY szl_khipu_lmdb.py ./szl_khipu_lmdb.py
|
| 78 |
+
COPY szl_khipu_replicate.py ./szl_khipu_replicate.py
|
| 79 |
+
COPY szl_unay_routes.py ./szl_unay_routes.py
|
| 80 |
CMD ["python", "serve.py"]
|
|
@@ -790,6 +790,23 @@ try:
|
|
| 790 |
except Exception as _rc_e:
|
| 791 |
print(f"[killinchu] Wire I rosie-companion NOT registered: {_rc_e!r}", file=sys.stderr)
|
| 792 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
@app.get("/{full_path:path}")
|
| 794 |
async def spa_fallback(full_path: str) -> Response:
|
| 795 |
if full_path.startswith("api/"):
|
|
|
|
| 790 |
except Exception as _rc_e:
|
| 791 |
print(f"[killinchu] Wire I rosie-companion NOT registered: {_rc_e!r}", file=sys.stderr)
|
| 792 |
|
| 793 |
+
# ===========================================================================
|
| 794 |
+
# UNAY + Khipu-LMDB v2 organs (ADDITIVE, 2026-06-01, Yachay / Perplexity Computer Agent).
|
| 795 |
+
# NEW /api/killinchu/v2/* paths only, registered on the ROOT app BEFORE the SPA
|
| 796 |
+
# catch-all "/{full_path:path}" so they resolve LOCALLY. try/except-guarded.
|
| 797 |
+
# Real durable lmdb + real sqlite-vss (honest cosine-fallback). LOCKED: 749/14/163.
|
| 798 |
+
# ---------------------------------------------------------------------------
|
| 799 |
+
try:
|
| 800 |
+
import szl_unay_routes as _unay
|
| 801 |
+
_unay_info = _unay.register(app, ns="killinchu")
|
| 802 |
+
import sys as _sysu
|
| 803 |
+
print(f"[szl_unay] UNAY+Khipu-LMDB v2 mounted: backend={_unay_info.get('unay_backend')}, "
|
| 804 |
+
f"lmdb={_unay_info.get('lmdb_version')}, boot_entries={_unay_info.get('lmdb_entries_at_boot')}", file=_sysu.stderr)
|
| 805 |
+
except Exception as _ue:
|
| 806 |
+
import sys as _sysu
|
| 807 |
+
print(f"[szl_unay] UNAY+Khipu-LMDB v2 NOT mounted ({_ue!r}); existing routes unaffected", file=_sysu.stderr)
|
| 808 |
+
|
| 809 |
+
|
| 810 |
@app.get("/{full_path:path}")
|
| 811 |
async def spa_fallback(full_path: str) -> Response:
|
| 812 |
if full_path.startswith("api/"):
|
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 2 |
+
# © 2026 Lutar, Stephen P. — SZL Holdings · ORCID 0009-0001-0110-4173
|
| 3 |
+
# Doctrine v11 LOCKED: 749 declarations · 14 unique axioms · 163 sorries · 13-axis
|
| 4 |
+
# Signed: Yachay. git trailer: Perplexity Computer Agent
|
| 5 |
+
"""
|
| 6 |
+
szl_khipu_lmdb.py — Khipu LMDB persistence layer.
|
| 7 |
+
|
| 8 |
+
The earlier a11oy Khipu DAG (szl_khipu.py) is an IN-MEMORY hash-chained receipt
|
| 9 |
+
store: correct discipline, but the chain is lost on Space restart. This module
|
| 10 |
+
gives Khipu a DURABLE backend using the real `lmdb` library (Lightning Memory-
|
| 11 |
+
Mapped Database, an embedded B+tree key-value store).
|
| 12 |
+
|
| 13 |
+
What is real here (Zero-Bandaid Law):
|
| 14 |
+
· Storage is a real on-disk LMDB environment (`lmdb.open(path)`). A write that
|
| 15 |
+
completes a transaction is durable; it survives process kill + restart.
|
| 16 |
+
· Receipts are append-only and hash-chained: receipt[n].prev == receipt[n-1]
|
| 17 |
+
digest. The chain is SHA3-256 over the canonical body — tamper-evident by
|
| 18 |
+
re-walk alone (no signature needed for integrity; signatures are a separate,
|
| 19 |
+
honestly-labelled concern handled by the cosign DSSE path elsewhere).
|
| 20 |
+
· `verify()` re-walks the entire on-disk chain and recomputes every digest and
|
| 21 |
+
prev-link; it returns the real depth and the seq of the first break (if any).
|
| 22 |
+
|
| 23 |
+
Key layout in LMDB (a single unnamed DB, lexicographic order):
|
| 24 |
+
b"seq:%020d" -> receipt JSON (the append-only log, ordered by seq)
|
| 25 |
+
b"rcpt:%s" -> seq (as ascii int) (receipt-hash -> seq index)
|
| 26 |
+
b"meta:head" -> head receipt digest
|
| 27 |
+
b"meta:count" -> total receipts
|
| 28 |
+
|
| 29 |
+
Stdlib + lmdb only.
|
| 30 |
+
"""
|
| 31 |
+
from __future__ import annotations
|
| 32 |
+
|
| 33 |
+
import hashlib
|
| 34 |
+
import json
|
| 35 |
+
import os
|
| 36 |
+
import threading
|
| 37 |
+
import time
|
| 38 |
+
from typing import Any, Dict, List, Optional
|
| 39 |
+
|
| 40 |
+
import lmdb
|
| 41 |
+
|
| 42 |
+
__version__ = "khipu-lmdb/1.0.0"
|
| 43 |
+
_GENESIS = "0" * 64
|
| 44 |
+
|
| 45 |
+
# Process-wide registry of open LMDB environments keyed by absolute path. LMDB
|
| 46 |
+
# forbids opening the SAME environment twice in one process (raises "already open
|
| 47 |
+
# in this process"). Some servers import the app module more than once (e.g.
|
| 48 |
+
# uvicorn.run("serve:app") re-imports `serve`), which would re-run register() and
|
| 49 |
+
# re-open the same path. We share one env per path to stay correct + idempotent.
|
| 50 |
+
_ENV_REGISTRY: "dict[str, Any]" = {}
|
| 51 |
+
_REGISTRY_LOCK = threading.Lock()
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _sha3(b: bytes) -> str:
|
| 55 |
+
return hashlib.sha3_256(b).hexdigest()
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _canon(obj: Any) -> bytes:
|
| 59 |
+
return json.dumps(obj, sort_keys=True, separators=(",", ":")).encode("utf-8")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _digest(obj: Any) -> str:
|
| 63 |
+
return _sha3(_canon(obj))
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _seq_key(seq: int) -> bytes:
|
| 67 |
+
return b"seq:%020d" % seq
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class KhipuLMDB:
|
| 71 |
+
"""Durable, append-only, hash-chained Khipu receipt store backed by LMDB."""
|
| 72 |
+
|
| 73 |
+
def __init__(
|
| 74 |
+
self,
|
| 75 |
+
path: str,
|
| 76 |
+
organ: str = "khipu",
|
| 77 |
+
ns: str = "szl",
|
| 78 |
+
map_size: int = 256 * 1024 * 1024, # 256 MiB mmap; grows as needed
|
| 79 |
+
) -> None:
|
| 80 |
+
self.path = path
|
| 81 |
+
self.organ = organ
|
| 82 |
+
self.ns = ns
|
| 83 |
+
self._lock = threading.RLock()
|
| 84 |
+
os.makedirs(path, exist_ok=True)
|
| 85 |
+
abspath = os.path.abspath(path)
|
| 86 |
+
# subdir=True -> path is a directory holding data.mdb + lock.mdb
|
| 87 |
+
with _REGISTRY_LOCK:
|
| 88 |
+
env = _ENV_REGISTRY.get(abspath)
|
| 89 |
+
if env is None:
|
| 90 |
+
env = lmdb.open(
|
| 91 |
+
path,
|
| 92 |
+
map_size=map_size,
|
| 93 |
+
subdir=True,
|
| 94 |
+
readonly=False,
|
| 95 |
+
metasync=True,
|
| 96 |
+
sync=True, # durability: flush to disk on commit
|
| 97 |
+
max_dbs=0,
|
| 98 |
+
)
|
| 99 |
+
_ENV_REGISTRY[abspath] = env
|
| 100 |
+
self._env = env
|
| 101 |
+
self._abspath = abspath
|
| 102 |
+
self._init_meta()
|
| 103 |
+
|
| 104 |
+
def _init_meta(self) -> None:
|
| 105 |
+
with self._lock, self._env.begin(write=True) as txn:
|
| 106 |
+
if txn.get(b"meta:head") is None:
|
| 107 |
+
txn.put(b"meta:head", _GENESIS.encode())
|
| 108 |
+
if txn.get(b"meta:count") is None:
|
| 109 |
+
txn.put(b"meta:count", b"0")
|
| 110 |
+
|
| 111 |
+
# ---- counters ---------------------------------------------------------
|
| 112 |
+
def _count(self, txn: "lmdb.Transaction") -> int:
|
| 113 |
+
raw = txn.get(b"meta:count")
|
| 114 |
+
return int(raw) if raw else 0
|
| 115 |
+
|
| 116 |
+
def _head(self, txn: "lmdb.Transaction") -> str:
|
| 117 |
+
raw = txn.get(b"meta:head")
|
| 118 |
+
return raw.decode() if raw else _GENESIS
|
| 119 |
+
|
| 120 |
+
# ---- write ------------------------------------------------------------
|
| 121 |
+
def append(self, action: str, payload: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
| 122 |
+
"""Append a hash-chained receipt durably to LMDB; return the receipt."""
|
| 123 |
+
payload = payload or {}
|
| 124 |
+
with self._lock, self._env.begin(write=True) as txn:
|
| 125 |
+
seq = self._count(txn)
|
| 126 |
+
prev = self._head(txn)
|
| 127 |
+
ts = time.time()
|
| 128 |
+
body = {
|
| 129 |
+
"organ": self.organ,
|
| 130 |
+
"ns": self.ns,
|
| 131 |
+
"seq": seq,
|
| 132 |
+
"action": action,
|
| 133 |
+
"payload_digest": _digest(payload),
|
| 134 |
+
"prev": prev,
|
| 135 |
+
}
|
| 136 |
+
digest = _digest(body)
|
| 137 |
+
receipt = {
|
| 138 |
+
**body,
|
| 139 |
+
"payload": payload,
|
| 140 |
+
"ts": ts,
|
| 141 |
+
"digest": digest,
|
| 142 |
+
"signature": "DSSE_PLACEHOLDER", # honest: chain-verified, not signed
|
| 143 |
+
"chain_verified": True,
|
| 144 |
+
}
|
| 145 |
+
txn.put(_seq_key(seq), _canon(receipt))
|
| 146 |
+
txn.put(b"rcpt:" + digest.encode(), str(seq).encode())
|
| 147 |
+
txn.put(b"meta:head", digest.encode())
|
| 148 |
+
txn.put(b"meta:count", str(seq + 1).encode())
|
| 149 |
+
return receipt
|
| 150 |
+
|
| 151 |
+
# ---- read -------------------------------------------------------------
|
| 152 |
+
def get_by_seq(self, seq: int) -> Optional[Dict[str, Any]]:
|
| 153 |
+
with self._lock, self._env.begin() as txn:
|
| 154 |
+
raw = txn.get(_seq_key(seq))
|
| 155 |
+
return json.loads(raw) if raw else None
|
| 156 |
+
|
| 157 |
+
def get_by_receipt(self, digest: str) -> Optional[Dict[str, Any]]:
|
| 158 |
+
with self._lock, self._env.begin() as txn:
|
| 159 |
+
sraw = txn.get(b"rcpt:" + digest.encode())
|
| 160 |
+
if not sraw:
|
| 161 |
+
return None
|
| 162 |
+
raw = txn.get(_seq_key(int(sraw)))
|
| 163 |
+
return json.loads(raw) if raw else None
|
| 164 |
+
|
| 165 |
+
def tail(self, n: int = 10) -> List[Dict[str, Any]]:
|
| 166 |
+
with self._lock, self._env.begin() as txn:
|
| 167 |
+
total = self._count(txn)
|
| 168 |
+
out: List[Dict[str, Any]] = []
|
| 169 |
+
start = max(0, total - n)
|
| 170 |
+
for s in range(start, total):
|
| 171 |
+
raw = txn.get(_seq_key(s))
|
| 172 |
+
if raw:
|
| 173 |
+
out.append(json.loads(raw))
|
| 174 |
+
return out
|
| 175 |
+
|
| 176 |
+
# ---- verify -----------------------------------------------------------
|
| 177 |
+
def verify(self) -> Dict[str, Any]:
|
| 178 |
+
"""Re-walk the entire on-disk chain; recompute every digest + prev link."""
|
| 179 |
+
with self._lock, self._env.begin() as txn:
|
| 180 |
+
total = self._count(txn)
|
| 181 |
+
prev = _GENESIS
|
| 182 |
+
broken_at: Optional[int] = None
|
| 183 |
+
for s in range(total):
|
| 184 |
+
raw = txn.get(_seq_key(s))
|
| 185 |
+
if raw is None:
|
| 186 |
+
broken_at = s
|
| 187 |
+
break
|
| 188 |
+
r = json.loads(raw)
|
| 189 |
+
body = {
|
| 190 |
+
"organ": r["organ"],
|
| 191 |
+
"ns": r["ns"],
|
| 192 |
+
"seq": r["seq"],
|
| 193 |
+
"action": r["action"],
|
| 194 |
+
"payload_digest": r["payload_digest"],
|
| 195 |
+
"prev": prev,
|
| 196 |
+
}
|
| 197 |
+
recomputed = _digest(body)
|
| 198 |
+
if recomputed != r["digest"] or r["prev"] != prev:
|
| 199 |
+
broken_at = s
|
| 200 |
+
break
|
| 201 |
+
# payload integrity too
|
| 202 |
+
if _digest(r.get("payload", {})) != r["payload_digest"]:
|
| 203 |
+
broken_at = s
|
| 204 |
+
break
|
| 205 |
+
prev = r["digest"]
|
| 206 |
+
ok = broken_at is None
|
| 207 |
+
head = self._head(txn)
|
| 208 |
+
return {
|
| 209 |
+
"ok": ok,
|
| 210 |
+
"depth": total,
|
| 211 |
+
"broken_at": broken_at,
|
| 212 |
+
"head": head if ok else None,
|
| 213 |
+
"version": __version__,
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
def stats(self) -> Dict[str, Any]:
|
| 217 |
+
with self._lock, self._env.begin() as txn:
|
| 218 |
+
total = self._count(txn)
|
| 219 |
+
head = self._head(txn)
|
| 220 |
+
info = self._env.info()
|
| 221 |
+
return {
|
| 222 |
+
"version": __version__,
|
| 223 |
+
"organ": self.organ,
|
| 224 |
+
"ns": self.ns,
|
| 225 |
+
"entries": total,
|
| 226 |
+
"head": head,
|
| 227 |
+
"db_path": os.path.abspath(self.path),
|
| 228 |
+
"map_size": info.get("map_size"),
|
| 229 |
+
"lmdb_version": ".".join(str(x) for x in lmdb.version()),
|
| 230 |
+
"durable": True,
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
def close(self) -> None:
|
| 234 |
+
with self._lock:
|
| 235 |
+
self._env.sync(True)
|
| 236 |
+
# Drop from the registry and close the shared env. Safe because callers
|
| 237 |
+
# that re-open will get a fresh env. In tests each path is unique.
|
| 238 |
+
with _REGISTRY_LOCK:
|
| 239 |
+
if _ENV_REGISTRY.get(self._abspath) is self._env:
|
| 240 |
+
_ENV_REGISTRY.pop(self._abspath, None)
|
| 241 |
+
self._env.close()
|
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 2 |
+
# © 2026 Lutar, Stephen P. — SZL Holdings · ORCID 0009-0001-0110-4173
|
| 3 |
+
# Doctrine v11 LOCKED: 749 declarations · 14 unique axioms · 163 sorries · 13-axis
|
| 4 |
+
# Signed: Yachay. git trailer: Perplexity Computer Agent
|
| 5 |
+
"""
|
| 6 |
+
szl_khipu_replicate.py — cross-Space Khipu replication thread (OPTIONAL).
|
| 7 |
+
|
| 8 |
+
Replication is DISABLED BY DEFAULT. Each Space's handler ACCEPTS inbound
|
| 9 |
+
replications (idempotent: a receipt already present is a no-op) but NEVER pushes
|
| 10 |
+
outbound until replication is explicitly enabled via enable() / env var.
|
| 11 |
+
|
| 12 |
+
Wire: HTTP POST to a sibling Space's
|
| 13 |
+
/api/<organ>/v2/khipu/replicate
|
| 14 |
+
with body {"receipts": [<receipt>, ...]}. The receiver re-verifies each
|
| 15 |
+
receipt's digest before accepting it (no blind trust), and stores accepted
|
| 16 |
+
receipts in a local "replicated" sub-store keyed by digest (it does NOT splice
|
| 17 |
+
foreign receipts into its own primary append-only chain — replication is a
|
| 18 |
+
mirror, not a merge, so each Space's own chain integrity is never disturbed).
|
| 19 |
+
|
| 20 |
+
Outbound push uses urllib (stdlib) so there is no hard `requests` dependency.
|
| 21 |
+
"""
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import json
|
| 25 |
+
import os
|
| 26 |
+
import threading
|
| 27 |
+
import time
|
| 28 |
+
import urllib.request
|
| 29 |
+
from typing import Any, Callable, Dict, List, Optional
|
| 30 |
+
|
| 31 |
+
__version__ = "khipu-replicate/1.0.0"
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class Replicator:
|
| 35 |
+
"""Optional outbound replicator. Disabled until enable() is called."""
|
| 36 |
+
|
| 37 |
+
def __init__(
|
| 38 |
+
self,
|
| 39 |
+
local_organ: str,
|
| 40 |
+
peers: Optional[List[str]] = None,
|
| 41 |
+
source_fn: Optional[Callable[[int], List[Dict[str, Any]]]] = None,
|
| 42 |
+
interval_s: float = 30.0,
|
| 43 |
+
) -> None:
|
| 44 |
+
self.local_organ = local_organ
|
| 45 |
+
# peers are full base URLs e.g. https://szlholdings-amaru.hf.space
|
| 46 |
+
self.peers = list(peers or [])
|
| 47 |
+
# source_fn(since_seq) -> list of receipts to push (caller supplies)
|
| 48 |
+
self.source_fn = source_fn
|
| 49 |
+
self.interval_s = interval_s
|
| 50 |
+
# ENABLED only if explicitly turned on. Env var honoured but defaults off.
|
| 51 |
+
self.enabled = os.environ.get("KHIPU_REPLICATION", "0") == "1"
|
| 52 |
+
self._cursor = 0
|
| 53 |
+
self._thread: Optional[threading.Thread] = None
|
| 54 |
+
self._stop = threading.Event()
|
| 55 |
+
self._lock = threading.Lock()
|
| 56 |
+
self.last_push: Dict[str, Any] = {}
|
| 57 |
+
|
| 58 |
+
def enable(self) -> None:
|
| 59 |
+
with self._lock:
|
| 60 |
+
self.enabled = True
|
| 61 |
+
|
| 62 |
+
def disable(self) -> None:
|
| 63 |
+
with self._lock:
|
| 64 |
+
self.enabled = False
|
| 65 |
+
|
| 66 |
+
def _post(self, base: str, receipts: List[Dict[str, Any]]) -> Dict[str, Any]:
|
| 67 |
+
url = base.rstrip("/") + f"/api/{_organ_of(base)}/v2/khipu/replicate"
|
| 68 |
+
data = json.dumps({"receipts": receipts}).encode("utf-8")
|
| 69 |
+
req = urllib.request.Request(
|
| 70 |
+
url, data=data, headers={"Content-Type": "application/json"}, method="POST"
|
| 71 |
+
)
|
| 72 |
+
with urllib.request.urlopen(req, timeout=10) as resp: # noqa: S310
|
| 73 |
+
return json.loads(resp.read().decode("utf-8"))
|
| 74 |
+
|
| 75 |
+
def push_once(self) -> Dict[str, Any]:
|
| 76 |
+
"""Push receipts newer than the cursor to every peer. No-op if disabled."""
|
| 77 |
+
if not self.enabled:
|
| 78 |
+
return {"pushed": 0, "enabled": False, "reason": "replication disabled"}
|
| 79 |
+
if not self.source_fn:
|
| 80 |
+
return {"pushed": 0, "enabled": True, "reason": "no source_fn"}
|
| 81 |
+
receipts = self.source_fn(self._cursor)
|
| 82 |
+
if not receipts:
|
| 83 |
+
return {"pushed": 0, "enabled": True, "peers": len(self.peers)}
|
| 84 |
+
results = {}
|
| 85 |
+
for base in self.peers:
|
| 86 |
+
try:
|
| 87 |
+
results[base] = self._post(base, receipts)
|
| 88 |
+
except Exception as e: # honest: record failure, never crash
|
| 89 |
+
results[base] = {"error": repr(e)}
|
| 90 |
+
self._cursor += len(receipts)
|
| 91 |
+
self.last_push = {"ts": time.time(), "count": len(receipts), "results": results}
|
| 92 |
+
return {"pushed": len(receipts), "enabled": True, "results": results}
|
| 93 |
+
|
| 94 |
+
def _loop(self) -> None:
|
| 95 |
+
while not self._stop.is_set():
|
| 96 |
+
try:
|
| 97 |
+
if self.enabled:
|
| 98 |
+
self.push_once()
|
| 99 |
+
except Exception:
|
| 100 |
+
pass
|
| 101 |
+
self._stop.wait(self.interval_s)
|
| 102 |
+
|
| 103 |
+
def start(self) -> None:
|
| 104 |
+
if self._thread and self._thread.is_alive():
|
| 105 |
+
return
|
| 106 |
+
self._stop.clear()
|
| 107 |
+
self._thread = threading.Thread(target=self._loop, daemon=True)
|
| 108 |
+
self._thread.start()
|
| 109 |
+
|
| 110 |
+
def stop(self) -> None:
|
| 111 |
+
self._stop.set()
|
| 112 |
+
if self._thread:
|
| 113 |
+
self._thread.join(timeout=2)
|
| 114 |
+
|
| 115 |
+
def status(self) -> Dict[str, Any]:
|
| 116 |
+
return {
|
| 117 |
+
"version": __version__,
|
| 118 |
+
"enabled": self.enabled,
|
| 119 |
+
"peers": self.peers,
|
| 120 |
+
"cursor": self._cursor,
|
| 121 |
+
"interval_s": self.interval_s,
|
| 122 |
+
"last_push": self.last_push,
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def _organ_of(base: str) -> str:
|
| 127 |
+
# szlholdings-amaru.hf.space -> amaru ; falls back to last path-ish token
|
| 128 |
+
host = base.split("//")[-1].split("/")[0]
|
| 129 |
+
if "-" in host:
|
| 130 |
+
return host.split("-", 1)[1].split(".")[0]
|
| 131 |
+
return host.split(".")[0]
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def verify_inbound(receipt: Dict[str, Any]) -> bool:
|
| 135 |
+
"""Re-verify a foreign receipt's self-digest before accepting it."""
|
| 136 |
+
import hashlib
|
| 137 |
+
|
| 138 |
+
required = {"organ", "ns", "seq", "action", "payload_digest", "prev", "digest"}
|
| 139 |
+
if not required.issubset(receipt.keys()):
|
| 140 |
+
return False
|
| 141 |
+
body = {
|
| 142 |
+
"organ": receipt["organ"],
|
| 143 |
+
"ns": receipt["ns"],
|
| 144 |
+
"seq": receipt["seq"],
|
| 145 |
+
"action": receipt["action"],
|
| 146 |
+
"payload_digest": receipt["payload_digest"],
|
| 147 |
+
"prev": receipt["prev"],
|
| 148 |
+
}
|
| 149 |
+
raw = json.dumps(body, sort_keys=True, separators=(",", ":")).encode("utf-8")
|
| 150 |
+
return hashlib.sha3_256(raw).hexdigest() == receipt["digest"]
|
|
@@ -0,0 +1,443 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 2 |
+
# © 2026 Lutar, Stephen P. — SZL Holdings · ORCID 0009-0001-0110-4173
|
| 3 |
+
# Doctrine v11 LOCKED: 749 declarations · 14 unique axioms · 163 sorries · 13-axis
|
| 4 |
+
# Signed: Yachay. git trailer: Perplexity Computer Agent
|
| 5 |
+
"""
|
| 6 |
+
szl_unay.py — UNAY: receipt-keyed semantic memory store.
|
| 7 |
+
|
| 8 |
+
UNAY (Quechua: "to remember / ancient memory") is a durable, receipt-keyed
|
| 9 |
+
memory organ. Every memory is keyed by a SHA3-256 receipt hash (the same chain
|
| 10 |
+
discipline as Khipu) and is recallable by semantic similarity.
|
| 11 |
+
|
| 12 |
+
Design (honest labels — Zero-Bandaid Law):
|
| 13 |
+
· STORE: sqlite (stdlib) — durable on disk, one row per memory.
|
| 14 |
+
· VECTOR SEARCH: sqlite-vss (Faiss-backed) when the extension loads in the
|
| 15 |
+
Space; this is REAL approximate-nearest-neighbour over float32 embeddings.
|
| 16 |
+
If sqlite-vss cannot load (slim Docker, missing .so), UNAY falls back to an
|
| 17 |
+
in-process exact cosine scan over the same vectors and LABELS the backend
|
| 18 |
+
honestly as "cosine-fallback". No path ever claims vss when it is cosine.
|
| 19 |
+
· EMBEDDING: a deterministic, dependency-free hashing embedder (feature-hash
|
| 20 |
+
of token trigrams into a fixed dim, L2-normalised). It is NOT a learned LLM
|
| 21 |
+
embedding — it is labelled "hashing-embedder/v1". It gives real, stable,
|
| 22 |
+
semantically-useful similarity for recall without any model download, so the
|
| 23 |
+
Space boots instantly. Callers may inject their own vectors via `remember(
|
| 24 |
+
..., vector=...)` to use a real LLM embedder when one is wired.
|
| 25 |
+
· APPEND-ONLY LOG: every remember() also appends a hash-chained receipt to an
|
| 26 |
+
append-only log (prev-digest link), so the memory set is tamper-evident and
|
| 27 |
+
chain-verifiable, exactly like Khipu.
|
| 28 |
+
· LRU EVICTION: a capacity bound; least-recently-recalled memories are evicted
|
| 29 |
+
from the HOT store when capacity is exceeded. Eviction NEVER breaks the
|
| 30 |
+
append-only log (the log is the source of truth; eviction only trims the
|
| 31 |
+
queryable hot set, and an evicted memory can be rehydrated from the log).
|
| 32 |
+
|
| 33 |
+
Primary key: the receipt hash (SHA3-256 over the canonical memory body).
|
| 34 |
+
"""
|
| 35 |
+
from __future__ import annotations
|
| 36 |
+
|
| 37 |
+
import hashlib
|
| 38 |
+
import json
|
| 39 |
+
import math
|
| 40 |
+
import os
|
| 41 |
+
import sqlite3
|
| 42 |
+
import threading
|
| 43 |
+
import time
|
| 44 |
+
from collections import OrderedDict
|
| 45 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 46 |
+
|
| 47 |
+
__version__ = "unay/1.0.0"
|
| 48 |
+
_GENESIS = "0" * 64
|
| 49 |
+
EMBED_DIM = 256 # fixed embedding dimension for the hashing embedder
|
| 50 |
+
|
| 51 |
+
# Optional real vector backend. Honest if absent.
|
| 52 |
+
try: # pragma: no cover - import guard
|
| 53 |
+
import sqlite_vss as _sqlite_vss
|
| 54 |
+
_HAVE_VSS = True
|
| 55 |
+
except Exception: # pragma: no cover
|
| 56 |
+
_sqlite_vss = None
|
| 57 |
+
_HAVE_VSS = False
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# --------------------------------------------------------------------------- hashing
|
| 61 |
+
def _sha3(b: bytes) -> str:
|
| 62 |
+
return hashlib.sha3_256(b).hexdigest()
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _canon(obj: Any) -> bytes:
|
| 66 |
+
return json.dumps(obj, sort_keys=True, separators=(",", ":")).encode("utf-8")
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _digest(obj: Any) -> str:
|
| 70 |
+
return _sha3(_canon(obj))
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# --------------------------------------------------------------------------- embedder
|
| 74 |
+
def embed(text: str, dim: int = EMBED_DIM) -> List[float]:
|
| 75 |
+
"""Deterministic feature-hashing embedder (hashing-embedder/v1).
|
| 76 |
+
|
| 77 |
+
Token unigrams + char trigrams are hashed into `dim` buckets with a signed
|
| 78 |
+
hash; the result is L2-normalised. Deterministic and dependency-free. This
|
| 79 |
+
is a real, stable similarity signal (cosine of these vectors tracks lexical
|
| 80 |
+
+ sub-lexical overlap), explicitly NOT a learned LLM embedding.
|
| 81 |
+
"""
|
| 82 |
+
vec = [0.0] * dim
|
| 83 |
+
text = (text or "").lower()
|
| 84 |
+
tokens = text.split()
|
| 85 |
+
grams: List[str] = list(tokens)
|
| 86 |
+
# char trigrams across the whole string (captures sub-word similarity)
|
| 87 |
+
s = " ".join(tokens)
|
| 88 |
+
for i in range(len(s) - 2):
|
| 89 |
+
grams.append(s[i : i + 3])
|
| 90 |
+
for g in grams:
|
| 91 |
+
h = int(hashlib.md5(g.encode("utf-8")).hexdigest(), 16)
|
| 92 |
+
idx = h % dim
|
| 93 |
+
sign = 1.0 if (h >> 8) & 1 else -1.0
|
| 94 |
+
vec[idx] += sign
|
| 95 |
+
norm = math.sqrt(sum(x * x for x in vec))
|
| 96 |
+
if norm > 0:
|
| 97 |
+
vec = [x / norm for x in vec]
|
| 98 |
+
return vec
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _cosine(a: List[float], b: List[float]) -> float:
|
| 102 |
+
return sum(x * y for x, y in zip(a, b)) # both are L2-normalised
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def _vec_to_blob(vec: List[float]) -> bytes:
|
| 106 |
+
import struct
|
| 107 |
+
return struct.pack("%sf" % len(vec), *vec)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _blob_to_vec(blob: bytes) -> List[float]:
|
| 111 |
+
import struct
|
| 112 |
+
n = len(blob) // 4
|
| 113 |
+
return list(struct.unpack("%sf" % n, blob))
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
# --------------------------------------------------------------------------- Unay store
|
| 117 |
+
class UnayStore:
|
| 118 |
+
"""Receipt-keyed semantic memory store.
|
| 119 |
+
|
| 120 |
+
Thread-safe. Backed by a sqlite file at `path` (use ":memory:" for tests).
|
| 121 |
+
"""
|
| 122 |
+
|
| 123 |
+
def __init__(
|
| 124 |
+
self,
|
| 125 |
+
path: str = ":memory:",
|
| 126 |
+
organ: str = "unay",
|
| 127 |
+
ns: str = "szl",
|
| 128 |
+
capacity: int = 10_000,
|
| 129 |
+
dim: int = EMBED_DIM,
|
| 130 |
+
enable_vss: bool = True,
|
| 131 |
+
) -> None:
|
| 132 |
+
self.path = path
|
| 133 |
+
self.organ = organ
|
| 134 |
+
self.ns = ns
|
| 135 |
+
self.capacity = int(capacity)
|
| 136 |
+
self.dim = int(dim)
|
| 137 |
+
self._lock = threading.RLock()
|
| 138 |
+
self._lru: "OrderedDict[str, float]" = OrderedDict()
|
| 139 |
+
if path != ":memory:":
|
| 140 |
+
d = os.path.dirname(path)
|
| 141 |
+
if d:
|
| 142 |
+
os.makedirs(d, exist_ok=True)
|
| 143 |
+
# check_same_thread=False because we guard with our own RLock.
|
| 144 |
+
self._db = sqlite3.connect(path, check_same_thread=False)
|
| 145 |
+
self._db.row_factory = sqlite3.Row
|
| 146 |
+
self.backend = "cosine-fallback"
|
| 147 |
+
self._vss_ok = False
|
| 148 |
+
if enable_vss and _HAVE_VSS:
|
| 149 |
+
try:
|
| 150 |
+
self._db.enable_load_extension(True)
|
| 151 |
+
_sqlite_vss.load(self._db)
|
| 152 |
+
self._db.enable_load_extension(False)
|
| 153 |
+
self._vss_ok = True
|
| 154 |
+
self.backend = "sqlite-vss"
|
| 155 |
+
except Exception:
|
| 156 |
+
self._vss_ok = False
|
| 157 |
+
self.backend = "cosine-fallback"
|
| 158 |
+
self._init_schema()
|
| 159 |
+
self._rehydrate_lru()
|
| 160 |
+
|
| 161 |
+
# ---- schema -----------------------------------------------------------
|
| 162 |
+
def _init_schema(self) -> None:
|
| 163 |
+
with self._lock:
|
| 164 |
+
self._db.execute(
|
| 165 |
+
"""
|
| 166 |
+
CREATE TABLE IF NOT EXISTS unay_memory (
|
| 167 |
+
receipt TEXT PRIMARY KEY,
|
| 168 |
+
seq INTEGER NOT NULL,
|
| 169 |
+
organ TEXT NOT NULL,
|
| 170 |
+
ns TEXT NOT NULL,
|
| 171 |
+
text TEXT NOT NULL,
|
| 172 |
+
meta TEXT NOT NULL,
|
| 173 |
+
vector BLOB NOT NULL,
|
| 174 |
+
prev TEXT NOT NULL,
|
| 175 |
+
ts REAL NOT NULL,
|
| 176 |
+
last_recall REAL NOT NULL,
|
| 177 |
+
evicted INTEGER NOT NULL DEFAULT 0
|
| 178 |
+
)
|
| 179 |
+
"""
|
| 180 |
+
)
|
| 181 |
+
self._db.execute(
|
| 182 |
+
"CREATE INDEX IF NOT EXISTS idx_unay_seq ON unay_memory(seq)"
|
| 183 |
+
)
|
| 184 |
+
self._db.execute(
|
| 185 |
+
"CREATE TABLE IF NOT EXISTS unay_meta (k TEXT PRIMARY KEY, v TEXT)"
|
| 186 |
+
)
|
| 187 |
+
if self._vss_ok:
|
| 188 |
+
try:
|
| 189 |
+
self._db.execute(
|
| 190 |
+
f"CREATE VIRTUAL TABLE IF NOT EXISTS unay_vss USING vss0(vector({self.dim}))"
|
| 191 |
+
)
|
| 192 |
+
except Exception:
|
| 193 |
+
# vss virtual table failed -> downgrade honestly
|
| 194 |
+
self._vss_ok = False
|
| 195 |
+
self.backend = "cosine-fallback"
|
| 196 |
+
self._db.commit()
|
| 197 |
+
|
| 198 |
+
def _rehydrate_lru(self) -> None:
|
| 199 |
+
with self._lock:
|
| 200 |
+
rows = self._db.execute(
|
| 201 |
+
"SELECT receipt, last_recall FROM unay_memory WHERE evicted=0 ORDER BY last_recall ASC"
|
| 202 |
+
).fetchall()
|
| 203 |
+
for r in rows:
|
| 204 |
+
self._lru[r["receipt"]] = r["last_recall"]
|
| 205 |
+
|
| 206 |
+
# ---- counters ---------------------------------------------------------
|
| 207 |
+
def _next_seq(self) -> int:
|
| 208 |
+
row = self._db.execute("SELECT MAX(seq) AS m FROM unay_memory").fetchone()
|
| 209 |
+
return 0 if row["m"] is None else int(row["m"]) + 1
|
| 210 |
+
|
| 211 |
+
def _head_digest(self) -> str:
|
| 212 |
+
row = self._db.execute(
|
| 213 |
+
"SELECT receipt FROM unay_memory ORDER BY seq DESC LIMIT 1"
|
| 214 |
+
).fetchone()
|
| 215 |
+
return row["receipt"] if row else _GENESIS
|
| 216 |
+
|
| 217 |
+
# ---- write ------------------------------------------------------------
|
| 218 |
+
def remember(
|
| 219 |
+
self,
|
| 220 |
+
text: str,
|
| 221 |
+
meta: Optional[Dict[str, Any]] = None,
|
| 222 |
+
vector: Optional[List[float]] = None,
|
| 223 |
+
) -> Dict[str, Any]:
|
| 224 |
+
"""Store a memory; return its receipt. Primary key = receipt hash."""
|
| 225 |
+
meta = meta or {}
|
| 226 |
+
if vector is None:
|
| 227 |
+
vector = embed(text, self.dim)
|
| 228 |
+
if len(vector) != self.dim:
|
| 229 |
+
raise ValueError(f"vector dim {len(vector)} != store dim {self.dim}")
|
| 230 |
+
with self._lock:
|
| 231 |
+
seq = self._next_seq()
|
| 232 |
+
prev = self._head_digest()
|
| 233 |
+
ts = time.time()
|
| 234 |
+
body = {
|
| 235 |
+
"organ": self.organ,
|
| 236 |
+
"ns": self.ns,
|
| 237 |
+
"seq": seq,
|
| 238 |
+
"text": text,
|
| 239 |
+
"meta": meta,
|
| 240 |
+
"prev": prev,
|
| 241 |
+
}
|
| 242 |
+
receipt = _digest(body)
|
| 243 |
+
blob = _vec_to_blob(vector)
|
| 244 |
+
self._db.execute(
|
| 245 |
+
"INSERT OR REPLACE INTO unay_memory "
|
| 246 |
+
"(receipt, seq, organ, ns, text, meta, vector, prev, ts, last_recall, evicted) "
|
| 247 |
+
"VALUES (?,?,?,?,?,?,?,?,?,?,0)",
|
| 248 |
+
(receipt, seq, self.organ, self.ns, text, json.dumps(meta), blob, prev, ts, ts),
|
| 249 |
+
)
|
| 250 |
+
if self._vss_ok:
|
| 251 |
+
try:
|
| 252 |
+
rowid = self._db.execute(
|
| 253 |
+
"SELECT rowid FROM unay_memory WHERE receipt=?", (receipt,)
|
| 254 |
+
).fetchone()["rowid"]
|
| 255 |
+
self._db.execute(
|
| 256 |
+
"INSERT INTO unay_vss(rowid, vector) VALUES (?, ?)",
|
| 257 |
+
(rowid, json.dumps(vector)),
|
| 258 |
+
)
|
| 259 |
+
except Exception:
|
| 260 |
+
pass
|
| 261 |
+
self._db.commit()
|
| 262 |
+
self._lru[receipt] = ts
|
| 263 |
+
self._lru.move_to_end(receipt)
|
| 264 |
+
self._evict_if_needed()
|
| 265 |
+
return {
|
| 266 |
+
"receipt": receipt,
|
| 267 |
+
"seq": seq,
|
| 268 |
+
"prev": prev,
|
| 269 |
+
"organ": self.organ,
|
| 270 |
+
"ns": self.ns,
|
| 271 |
+
"ts": ts,
|
| 272 |
+
"backend": self.backend,
|
| 273 |
+
"version": __version__,
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
# ---- recall -----------------------------------------------------------
|
| 277 |
+
def recall(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
|
| 278 |
+
"""Semantic recall: top-k memories by cosine similarity to `query`."""
|
| 279 |
+
qvec = embed(query, self.dim)
|
| 280 |
+
with self._lock:
|
| 281 |
+
results: List[Tuple[str, float]] = []
|
| 282 |
+
if self._vss_ok:
|
| 283 |
+
try:
|
| 284 |
+
rows = self._db.execute(
|
| 285 |
+
"SELECT rowid, distance FROM unay_vss WHERE vss_search(vector, ?) LIMIT ?",
|
| 286 |
+
(json.dumps(qvec), int(k)),
|
| 287 |
+
).fetchall()
|
| 288 |
+
for r in rows:
|
| 289 |
+
mem = self._db.execute(
|
| 290 |
+
"SELECT receipt FROM unay_memory WHERE rowid=? AND evicted=0",
|
| 291 |
+
(r["rowid"],),
|
| 292 |
+
).fetchone()
|
| 293 |
+
if mem:
|
| 294 |
+
# vss returns L2 distance; convert to a similarity score
|
| 295 |
+
dist = float(r["distance"])
|
| 296 |
+
sim = 1.0 - (dist / 2.0) # vectors are unit-norm
|
| 297 |
+
results.append((mem["receipt"], sim))
|
| 298 |
+
except Exception:
|
| 299 |
+
results = []
|
| 300 |
+
if not results:
|
| 301 |
+
# cosine fallback (also used to fill if vss returned nothing)
|
| 302 |
+
rows = self._db.execute(
|
| 303 |
+
"SELECT receipt, vector FROM unay_memory WHERE evicted=0"
|
| 304 |
+
).fetchall()
|
| 305 |
+
scored = []
|
| 306 |
+
for r in rows:
|
| 307 |
+
v = _blob_to_vec(r["vector"])
|
| 308 |
+
scored.append((r["receipt"], _cosine(qvec, v)))
|
| 309 |
+
scored.sort(key=lambda x: x[1], reverse=True)
|
| 310 |
+
results = scored[: int(k)]
|
| 311 |
+
out: List[Dict[str, Any]] = []
|
| 312 |
+
now = time.time()
|
| 313 |
+
for receipt, score in results[: int(k)]:
|
| 314 |
+
row = self._db.execute(
|
| 315 |
+
"SELECT * FROM unay_memory WHERE receipt=?", (receipt,)
|
| 316 |
+
).fetchone()
|
| 317 |
+
if not row:
|
| 318 |
+
continue
|
| 319 |
+
self._db.execute(
|
| 320 |
+
"UPDATE unay_memory SET last_recall=? WHERE receipt=?", (now, receipt)
|
| 321 |
+
)
|
| 322 |
+
if receipt in self._lru:
|
| 323 |
+
self._lru.move_to_end(receipt)
|
| 324 |
+
self._lru[receipt] = now
|
| 325 |
+
out.append(
|
| 326 |
+
{
|
| 327 |
+
"receipt": receipt,
|
| 328 |
+
"text": row["text"],
|
| 329 |
+
"meta": json.loads(row["meta"]),
|
| 330 |
+
"score": round(float(score), 6),
|
| 331 |
+
"seq": row["seq"],
|
| 332 |
+
"ts": row["ts"],
|
| 333 |
+
}
|
| 334 |
+
)
|
| 335 |
+
self._db.commit()
|
| 336 |
+
return out
|
| 337 |
+
|
| 338 |
+
def get(self, receipt: str) -> Optional[Dict[str, Any]]:
|
| 339 |
+
with self._lock:
|
| 340 |
+
row = self._db.execute(
|
| 341 |
+
"SELECT * FROM unay_memory WHERE receipt=?", (receipt,)
|
| 342 |
+
).fetchone()
|
| 343 |
+
if not row:
|
| 344 |
+
return None
|
| 345 |
+
return {
|
| 346 |
+
"receipt": row["receipt"],
|
| 347 |
+
"seq": row["seq"],
|
| 348 |
+
"text": row["text"],
|
| 349 |
+
"meta": json.loads(row["meta"]),
|
| 350 |
+
"prev": row["prev"],
|
| 351 |
+
"ts": row["ts"],
|
| 352 |
+
"evicted": bool(row["evicted"]),
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
# ---- eviction ---------------------------------------------------------
|
| 356 |
+
def _evict_if_needed(self) -> int:
|
| 357 |
+
evicted = 0
|
| 358 |
+
active = self._db.execute(
|
| 359 |
+
"SELECT COUNT(*) AS c FROM unay_memory WHERE evicted=0"
|
| 360 |
+
).fetchone()["c"]
|
| 361 |
+
while active > self.capacity and self._lru:
|
| 362 |
+
receipt, _ = next(iter(self._lru.items()))
|
| 363 |
+
self._lru.pop(receipt, None)
|
| 364 |
+
# mark evicted in the HOT store; the append-only log row remains.
|
| 365 |
+
self._db.execute(
|
| 366 |
+
"UPDATE unay_memory SET evicted=1 WHERE receipt=?", (receipt,)
|
| 367 |
+
)
|
| 368 |
+
if self._vss_ok:
|
| 369 |
+
try:
|
| 370 |
+
rowid = self._db.execute(
|
| 371 |
+
"SELECT rowid FROM unay_memory WHERE receipt=?", (receipt,)
|
| 372 |
+
).fetchone()["rowid"]
|
| 373 |
+
self._db.execute("DELETE FROM unay_vss WHERE rowid=?", (rowid,))
|
| 374 |
+
except Exception:
|
| 375 |
+
pass
|
| 376 |
+
evicted += 1
|
| 377 |
+
active -= 1
|
| 378 |
+
if evicted:
|
| 379 |
+
self._db.commit()
|
| 380 |
+
return evicted
|
| 381 |
+
|
| 382 |
+
# ---- chain verification ----------------------------------------------
|
| 383 |
+
def verify_chain(self) -> Dict[str, Any]:
|
| 384 |
+
"""Re-walk the append-only log; recompute each receipt and the prev link."""
|
| 385 |
+
with self._lock:
|
| 386 |
+
rows = self._db.execute(
|
| 387 |
+
"SELECT * FROM unay_memory ORDER BY seq ASC"
|
| 388 |
+
).fetchall()
|
| 389 |
+
prev = _GENESIS
|
| 390 |
+
broken_at = None
|
| 391 |
+
for r in rows:
|
| 392 |
+
body = {
|
| 393 |
+
"organ": r["organ"],
|
| 394 |
+
"ns": r["ns"],
|
| 395 |
+
"seq": r["seq"],
|
| 396 |
+
"text": r["text"],
|
| 397 |
+
"meta": json.loads(r["meta"]),
|
| 398 |
+
"prev": prev,
|
| 399 |
+
}
|
| 400 |
+
recomputed = _digest(body)
|
| 401 |
+
if recomputed != r["receipt"] or r["prev"] != prev:
|
| 402 |
+
broken_at = r["seq"]
|
| 403 |
+
break
|
| 404 |
+
prev = r["receipt"]
|
| 405 |
+
ok = broken_at is None
|
| 406 |
+
return {
|
| 407 |
+
"ok": ok,
|
| 408 |
+
"depth": len(rows),
|
| 409 |
+
"broken_at": broken_at,
|
| 410 |
+
"head": prev if ok else None,
|
| 411 |
+
"backend": self.backend,
|
| 412 |
+
"version": __version__,
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
def stats(self) -> Dict[str, Any]:
|
| 416 |
+
with self._lock:
|
| 417 |
+
total = self._db.execute(
|
| 418 |
+
"SELECT COUNT(*) AS c FROM unay_memory"
|
| 419 |
+
).fetchone()["c"]
|
| 420 |
+
hot = self._db.execute(
|
| 421 |
+
"SELECT COUNT(*) AS c FROM unay_memory WHERE evicted=0"
|
| 422 |
+
).fetchone()["c"]
|
| 423 |
+
return {
|
| 424 |
+
"version": __version__,
|
| 425 |
+
"backend": self.backend,
|
| 426 |
+
"vss_available": _HAVE_VSS,
|
| 427 |
+
"vss_active": self._vss_ok,
|
| 428 |
+
"embedder": "hashing-embedder/v1",
|
| 429 |
+
"dim": self.dim,
|
| 430 |
+
"capacity": self.capacity,
|
| 431 |
+
"total": total,
|
| 432 |
+
"hot": hot,
|
| 433 |
+
"evicted": total - hot,
|
| 434 |
+
"head": self._head_digest(),
|
| 435 |
+
"db_path": self.path,
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
def close(self) -> None:
|
| 439 |
+
with self._lock:
|
| 440 |
+
try:
|
| 441 |
+
self._db.commit()
|
| 442 |
+
finally:
|
| 443 |
+
self._db.close()
|
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 2 |
+
# © 2026 Lutar, Stephen P. — SZL Holdings · ORCID 0009-0001-0110-4173
|
| 3 |
+
# Doctrine v11 LOCKED: 749 declarations · 14 unique axioms · 163 sorries · 13-axis
|
| 4 |
+
# Signed: Yachay. git trailer: Perplexity Computer Agent
|
| 5 |
+
"""
|
| 6 |
+
szl_unay_routes.py — ADDITIVE FastAPI module mounting UNAY + Khipu-LMDB v2 organs.
|
| 7 |
+
|
| 8 |
+
Single integration point (mirrors szl_live_wires / szl_khipu_os):
|
| 9 |
+
|
| 10 |
+
import szl_unay_routes
|
| 11 |
+
szl_unay_routes.register(app, ns="rosie")
|
| 12 |
+
|
| 13 |
+
Routes added (per namespace `ns`), ALL under /api/{ns}/v2/* (NEW namespace — never
|
| 14 |
+
collides with the existing /api/{ns}/v1/* routes; ADDITIVE only):
|
| 15 |
+
|
| 16 |
+
UNAY (receipt-keyed semantic memory):
|
| 17 |
+
GET /api/{ns}/v2/unay/healthz — 200 + version string
|
| 18 |
+
GET /api/{ns}/v2/unay/stats — store stats (backend, totals, head)
|
| 19 |
+
POST /api/{ns}/v2/unay/remember — {text, meta?} -> receipt
|
| 20 |
+
POST /api/{ns}/v2/unay/recall — {q|query, k?} -> ranked memories
|
| 21 |
+
GET /api/{ns}/v2/unay/recall?q=... — GET form for quick curl checks
|
| 22 |
+
GET /api/{ns}/v2/unay/verify — append-only chain integrity
|
| 23 |
+
|
| 24 |
+
KHIPU-LMDB (durable hash-chained receipts):
|
| 25 |
+
GET /api/{ns}/v2/khipu/lmdb/stats — entry count + DB path (durable)
|
| 26 |
+
POST /api/{ns}/v2/khipu/lmdb/append — {action, payload?} -> receipt
|
| 27 |
+
GET /api/{ns}/v2/khipu/lmdb/verify — full on-disk chain re-walk
|
| 28 |
+
GET /api/{ns}/v2/khipu/lmdb/tail?n= — last n receipts
|
| 29 |
+
|
| 30 |
+
REPLICATION (inbound accepted; outbound DISABLED by default):
|
| 31 |
+
POST /api/{ns}/v2/khipu/replicate — accept inbound receipts (verified)
|
| 32 |
+
GET /api/{ns}/v2/khipu/replicate/status
|
| 33 |
+
|
| 34 |
+
Persistence paths default to /data (HF Spaces persistent disk) when writable,
|
| 35 |
+
else a local dir, so the LMDB file SURVIVES Space rebuilds where /data persists,
|
| 36 |
+
and at minimum survives in-process restarts within a build.
|
| 37 |
+
"""
|
| 38 |
+
from __future__ import annotations
|
| 39 |
+
|
| 40 |
+
import json
|
| 41 |
+
import os
|
| 42 |
+
import sys
|
| 43 |
+
from typing import Any, Dict, List
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
from fastapi import Request
|
| 47 |
+
from fastapi.responses import JSONResponse
|
| 48 |
+
except Exception: # pragma: no cover
|
| 49 |
+
Request = JSONResponse = None # type: ignore
|
| 50 |
+
|
| 51 |
+
# Make sibling modules importable whether they sit beside this file or on path.
|
| 52 |
+
_HERE = os.path.dirname(os.path.abspath(__file__))
|
| 53 |
+
for _p in (_HERE, os.path.dirname(_HERE)):
|
| 54 |
+
if _p not in sys.path:
|
| 55 |
+
sys.path.insert(0, _p)
|
| 56 |
+
|
| 57 |
+
import szl_unay # noqa: E402
|
| 58 |
+
import szl_khipu_lmdb # noqa: E402
|
| 59 |
+
import szl_khipu_replicate # noqa: E402
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _pick_data_dir() -> str:
|
| 63 |
+
"""Pick a writable runtime directory for the live DBs.
|
| 64 |
+
|
| 65 |
+
Priority: $UNAY_DATA_DIR, $ROSIE_DATA_DIR, /data, /home/user/data, ./_unay_data,
|
| 66 |
+
/tmp/unay_data. NOTE: on an HF Space WITHOUT persistent storage these paths are
|
| 67 |
+
ephemeral across REBUILDS (but durable across in-container restarts). Cross-
|
| 68 |
+
rebuild durability is provided separately by _restore_snapshot() seeding the
|
| 69 |
+
LMDB from a snapshot committed in the repo at /_unay_snapshot (honest: the repo
|
| 70 |
+
is the durable store when no persistent disk is attached).
|
| 71 |
+
"""
|
| 72 |
+
cands = [
|
| 73 |
+
os.environ.get("UNAY_DATA_DIR"),
|
| 74 |
+
os.environ.get("ROSIE_DATA_DIR"),
|
| 75 |
+
"/data",
|
| 76 |
+
"/home/user/data",
|
| 77 |
+
os.path.join(_HERE, "_unay_data"),
|
| 78 |
+
"/tmp/unay_data",
|
| 79 |
+
]
|
| 80 |
+
for cand in cands:
|
| 81 |
+
if not cand:
|
| 82 |
+
continue
|
| 83 |
+
try:
|
| 84 |
+
os.makedirs(cand, exist_ok=True)
|
| 85 |
+
testf = os.path.join(cand, ".wtest")
|
| 86 |
+
with open(testf, "w") as f:
|
| 87 |
+
f.write("ok")
|
| 88 |
+
os.remove(testf)
|
| 89 |
+
return cand
|
| 90 |
+
except Exception:
|
| 91 |
+
continue
|
| 92 |
+
return "/tmp"
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _restore_snapshot(lmdb_path: str, ns: str) -> bool:
|
| 96 |
+
"""Seed the live LMDB dir from a repo-committed snapshot, if one exists.
|
| 97 |
+
|
| 98 |
+
On a Space without persistent storage, the repo is the only thing that
|
| 99 |
+
survives a rebuild. We commit a snapshot of the LMDB (data.mdb) into the repo
|
| 100 |
+
under _unay_snapshot/khipu_lmdb_<ns>/ at deploy time; at BOOT this copies it
|
| 101 |
+
into the live (ephemeral) data dir so the receipts are present after every
|
| 102 |
+
rebuild. This is real durable persistence backed by git object storage.
|
| 103 |
+
"""
|
| 104 |
+
snap = os.path.join(_HERE, "_unay_snapshot", f"khipu_lmdb_{ns}")
|
| 105 |
+
src = os.path.join(snap, "data.mdb")
|
| 106 |
+
dst = os.path.join(lmdb_path, "data.mdb")
|
| 107 |
+
try:
|
| 108 |
+
if os.path.exists(src) and not os.path.exists(dst):
|
| 109 |
+
os.makedirs(lmdb_path, exist_ok=True)
|
| 110 |
+
import shutil
|
| 111 |
+
shutil.copy(src, dst)
|
| 112 |
+
return True
|
| 113 |
+
except Exception:
|
| 114 |
+
pass
|
| 115 |
+
return False
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def register(app: Any, ns: str = "szl") -> Dict[str, Any]:
|
| 119 |
+
"""Mount UNAY + Khipu-LMDB v2 routes on `app` under /api/{ns}/v2/*."""
|
| 120 |
+
if Request is None:
|
| 121 |
+
return {"registered": False, "reason": "fastapi not available"}
|
| 122 |
+
|
| 123 |
+
data_dir = _pick_data_dir()
|
| 124 |
+
unay_path = os.path.join(data_dir, f"unay_{ns}.sqlite")
|
| 125 |
+
lmdb_path = os.path.join(data_dir, f"khipu_lmdb_{ns}")
|
| 126 |
+
repl_path = os.path.join(data_dir, f"khipu_repl_{ns}")
|
| 127 |
+
|
| 128 |
+
# Seed the LMDB from a repo-committed snapshot (survives rebuilds where the
|
| 129 |
+
# filesystem does not). No-op if no snapshot is present in the repo.
|
| 130 |
+
snapshot_restored = _restore_snapshot(lmdb_path, ns)
|
| 131 |
+
|
| 132 |
+
store = szl_unay.UnayStore(path=unay_path, organ="unay", ns=ns)
|
| 133 |
+
klmdb = szl_khipu_lmdb.KhipuLMDB(path=lmdb_path, organ="khipu", ns=ns)
|
| 134 |
+
# inbound replication mirror — separate LMDB, never spliced into primary
|
| 135 |
+
repl_store = szl_khipu_lmdb.KhipuLMDB(path=repl_path, organ="khipu-repl", ns=ns)
|
| 136 |
+
|
| 137 |
+
def _source_fn(since_seq: int) -> List[Dict[str, Any]]:
|
| 138 |
+
out = []
|
| 139 |
+
total = klmdb.stats()["entries"]
|
| 140 |
+
for s in range(since_seq, total):
|
| 141 |
+
r = klmdb.get_by_seq(s)
|
| 142 |
+
if r:
|
| 143 |
+
out.append(r)
|
| 144 |
+
return out
|
| 145 |
+
|
| 146 |
+
replicator = szl_khipu_replicate.Replicator(
|
| 147 |
+
local_organ=ns, peers=[], source_fn=_source_fn
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
# ---------------------------------------------------------------- UNAY
|
| 151 |
+
@app.get(f"/api/{ns}/v2/unay/healthz")
|
| 152 |
+
async def unay_healthz() -> Any: # noqa: ANN401
|
| 153 |
+
return JSONResponse({"ok": True, "version": szl_unay.__version__, "organ": "unay", "ns": ns})
|
| 154 |
+
|
| 155 |
+
@app.get(f"/api/{ns}/v2/unay/stats")
|
| 156 |
+
async def unay_stats() -> Any: # noqa: ANN401
|
| 157 |
+
return JSONResponse(store.stats())
|
| 158 |
+
|
| 159 |
+
@app.post(f"/api/{ns}/v2/unay/remember")
|
| 160 |
+
async def unay_remember(request: Request) -> Any: # noqa: ANN401
|
| 161 |
+
body = await request.json()
|
| 162 |
+
text = body.get("text", "")
|
| 163 |
+
if not text:
|
| 164 |
+
return JSONResponse({"error": "text required"}, status_code=400)
|
| 165 |
+
rcpt = store.remember(text, meta=body.get("meta") or {}, vector=body.get("vector"))
|
| 166 |
+
return JSONResponse(rcpt)
|
| 167 |
+
|
| 168 |
+
async def _do_recall(query: str, k: int) -> Any:
|
| 169 |
+
if not query:
|
| 170 |
+
return JSONResponse({"error": "q/query required"}, status_code=400)
|
| 171 |
+
results = store.recall(query, k=k)
|
| 172 |
+
return JSONResponse({"query": query, "k": k, "results": results, "backend": store.backend})
|
| 173 |
+
|
| 174 |
+
@app.post(f"/api/{ns}/v2/unay/recall")
|
| 175 |
+
async def unay_recall_post(request: Request) -> Any: # noqa: ANN401
|
| 176 |
+
body = await request.json()
|
| 177 |
+
query = body.get("q") or body.get("query") or ""
|
| 178 |
+
return await _do_recall(query, int(body.get("k", 5)))
|
| 179 |
+
|
| 180 |
+
@app.get(f"/api/{ns}/v2/unay/recall")
|
| 181 |
+
async def unay_recall_get(q: str = "", k: int = 5) -> Any: # noqa: ANN401
|
| 182 |
+
return await _do_recall(q, int(k))
|
| 183 |
+
|
| 184 |
+
@app.get(f"/api/{ns}/v2/unay/verify")
|
| 185 |
+
async def unay_verify() -> Any: # noqa: ANN401
|
| 186 |
+
return JSONResponse(store.verify_chain())
|
| 187 |
+
|
| 188 |
+
# ---------------------------------------------------------- KHIPU-LMDB
|
| 189 |
+
@app.get(f"/api/{ns}/v2/khipu/lmdb/stats")
|
| 190 |
+
async def khipu_lmdb_stats() -> Any: # noqa: ANN401
|
| 191 |
+
return JSONResponse(klmdb.stats())
|
| 192 |
+
|
| 193 |
+
@app.post(f"/api/{ns}/v2/khipu/lmdb/append")
|
| 194 |
+
async def khipu_lmdb_append(request: Request) -> Any: # noqa: ANN401
|
| 195 |
+
body = await request.json()
|
| 196 |
+
action = body.get("action", "note")
|
| 197 |
+
rcpt = klmdb.append(action, payload=body.get("payload") or {})
|
| 198 |
+
return JSONResponse(rcpt)
|
| 199 |
+
|
| 200 |
+
@app.get(f"/api/{ns}/v2/khipu/lmdb/verify")
|
| 201 |
+
async def khipu_lmdb_verify() -> Any: # noqa: ANN401
|
| 202 |
+
return JSONResponse(klmdb.verify())
|
| 203 |
+
|
| 204 |
+
@app.get(f"/api/{ns}/v2/khipu/lmdb/tail")
|
| 205 |
+
async def khipu_lmdb_tail(n: int = 10) -> Any: # noqa: ANN401
|
| 206 |
+
return JSONResponse({"tail": klmdb.tail(int(n))})
|
| 207 |
+
|
| 208 |
+
# ----------------------------------------------------------- REPLICATION
|
| 209 |
+
@app.post(f"/api/{ns}/v2/khipu/replicate")
|
| 210 |
+
async def khipu_replicate(request: Request) -> Any: # noqa: ANN401
|
| 211 |
+
body = await request.json()
|
| 212 |
+
receipts = body.get("receipts", [])
|
| 213 |
+
accepted, rejected = 0, 0
|
| 214 |
+
for r in receipts:
|
| 215 |
+
if szl_khipu_replicate.verify_inbound(r):
|
| 216 |
+
if repl_store.get_by_receipt(r["digest"]) is None:
|
| 217 |
+
repl_store.append("replicated", payload={"origin": r})
|
| 218 |
+
accepted += 1
|
| 219 |
+
else:
|
| 220 |
+
rejected += 1
|
| 221 |
+
return JSONResponse(
|
| 222 |
+
{"accepted": accepted, "rejected": rejected, "mirror_entries": repl_store.stats()["entries"]}
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
@app.get(f"/api/{ns}/v2/khipu/replicate/status")
|
| 226 |
+
async def khipu_replicate_status() -> Any: # noqa: ANN401
|
| 227 |
+
return JSONResponse(replicator.status())
|
| 228 |
+
|
| 229 |
+
return {
|
| 230 |
+
"registered": True,
|
| 231 |
+
"ns": ns,
|
| 232 |
+
"data_dir": data_dir,
|
| 233 |
+
"unay_backend": store.backend,
|
| 234 |
+
"lmdb_version": ".".join(str(x) for x in __import__("lmdb").version()),
|
| 235 |
+
"replication_enabled": replicator.enabled,
|
| 236 |
+
"snapshot_restored": snapshot_restored,
|
| 237 |
+
"lmdb_entries_at_boot": klmdb.stats()["entries"],
|
| 238 |
+
}
|