Spaces:

Optitransfer
/

crdt-merge-data

Sleeping

App Files Files Community

crdt-merge-data / app.py

gille1983

Integrate E4 trust output into all demo tabs

8350470 about 1 month ago

raw

history blame contribute delete

33.5 kB

	# SPDX-License-Identifier: BUSL-1.1
	# Copyright 2026 Ryan Gillespie / Optitransfer
	#
	# Licensed under the Business Source License 1.1 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# https://github.com/mgillr/crdt-merge/blob/main/LICENSE
	#
	# Change Date: 2028-03-29
	# Change License: Apache License, Version 2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# On 2028-03-29 this file converts to Apache License, Version 2.0.

	"""
	crdt-merge v0.9.5 — Data Playground HuggingFace Space
	Tabular CRDT merge, conflict analysis, and core primitive demonstrations.
	"""

	import os
	import json
	import time
	import numpy as np
	import gradio as gr
	import plotly.graph_objects as go

	CSS = """
	.gradio-container { background: #09090b !important; font-family: 'Inter', system-ui, sans-serif !important; }
	.gr-button-primary { background: linear-gradient(135deg, #2563eb, #1d4ed8) !important; border: none !important; color: #fff !important; font-weight: 600 !important; }
	footer { display: none !important; }
	.tab-nav button { color: #a1a1aa !important; font-size: 13px !important; letter-spacing: 0.05em !important; text-transform: uppercase !important; font-weight: 600 !important; padding: 10px 16px !important; }
	.tab-nav button.selected { color: #f4f4f5 !important; border-bottom: 2px solid #3b82f6 !important; }
	.tab-nav button:hover { color: #e4e4e7 !important; }
	code, .monospace { font-family: 'JetBrains Mono', ui-monospace, monospace !important; font-size: 13px !important; }
	h1, h2, h3 { color: #f4f4f5 !important; }
	p, li { color: #d4d4d8 !important; font-size: 15px !important; line-height: 1.7 !important; }
	label, .gr-input-label, .label-wrap span { color: #e4e4e7 !important; font-size: 14px !important; font-weight: 500 !important; }
	input, textarea, select, .gr-box { color: #f4f4f5 !important; background: #18181b !important; border-color: #3f3f46 !important; }
	.gr-dataframe th, table th { color: #f4f4f5 !important; background: #18181b !important; font-weight: 600 !important; font-size: 13px !important; }
	.gr-dataframe td, table td { color: #d4d4d8 !important; font-size: 13px !important; border-color: #27272a !important; }
	.gr-dataframe tr:hover td { background: #1e1e22 !important; }
	.gr-info, .info { color: #a1a1aa !important; font-size: 12px !important; }
	strong { color: #f4f4f5 !important; }
	"""

	PLOTLY_LAYOUT = dict(
	paper_bgcolor="#09090b",
	plot_bgcolor="#18181b",
	font=dict(color="#a1a1aa", family="Inter"),
	xaxis=dict(gridcolor="#27272a", linecolor="#27272a"),
	yaxis=dict(gridcolor="#27272a", linecolor="#27272a"),
	margin=dict(l=60, r=20, t=40, b=60),
	)

	THEME = gr.themes.Base(
	primary_hue=gr.themes.colors.blue,
	neutral_hue=gr.themes.colors.zinc,
	)

	NAV_MD = """[🏠 Flagship](https://huggingface.co/spaces/optitransfer/crdt-merge) · [🔬 Data Playground](https://huggingface.co/spaces/optitransfer/crdt-merge-data) · [🌐 Federation](https://huggingface.co/spaces/optitransfer/crdt-merge-federation) · [GitHub ↗](https://github.com/mgillr/crdt-merge) · [⭐ Star Repo](https://github.com/mgillr/crdt-merge/stargazers) · [👁️ Watch](https://github.com/mgillr/crdt-merge/subscription) · [📐 Architecture Deep Dive](https://github.com/mgillr/crdt-merge/tree/main/docs/architecture) · [PyPI ↗](https://pypi.org/project/crdt-merge/)"""

	HERO_MD = """
	# crdt-merge — Data Playground

	Tabular CRDT merge for DataFrames and datasets. Conflict-free record merge, deduplication, and provenance tracking.

	`pip install crdt-merge` · [GitHub](https://github.com/mgillr/crdt-merge) · [PyPI](https://pypi.org/project/crdt-merge/) · Patent UK 2607132.4, GB2608127.3 · E4 Trust-Delta Architecture
	"""

	STRATEGIES_DF = ["LWW", "MaxWins", "MinWins", "Union"]


	# -----------------------------------------------------------------
	# Data loading
	# -----------------------------------------------------------------

	def _load_dataset_records():
	"""Try HF datasets first, fallback to synthetic."""
	source = "synthetic"
	records_a = []
	records_b = []

	try:
	from datasets import load_dataset
	ds = load_dataset("glue", "sst2", split="train[:200]")
	all_records = [{"id": i, "sentence": ds[i]["sentence"], "label": ds[i]["label"], "_ts": i}
	for i in range(len(ds))]
	records_a = all_records[:150]
	# Node B: overlapping records (100-149) get modified values + later timestamps
	records_b = []
	for r in all_records[100:]:
	rid = r["id"]
	if rid < 150: # overlapping region -- simulate a different node's edits
	records_b.append({
	"id": rid,
	"sentence": r["sentence"].strip() + " [node-B edit]",
	"label": 1 - r["label"], # flip label to create real conflict
	"_ts": rid + 50, # later timestamp for LWW
	})
	else:
	records_b.append(r)
	source = "glue/sst2 (HuggingFace datasets, 200 rows, 50 conflicting overlap)"
	except Exception:
	pass

	if not records_a:
	rng = np.random.RandomState(7)
	adjectives = ["good", "bad", "great", "poor", "excellent", "terrible", "fine", "awful"]
	nouns = ["film", "movie", "picture", "show", "performance", "script", "cast", "story"]
	for i in range(200):
	adj = adjectives[i % len(adjectives)]
	noun = nouns[i % len(nouns)]
	records_a.append({"id": i, "sentence": f"A {adj} {noun}.", "label": i % 2, "_ts": i})
	for i in range(100, 200):
	adj = adjectives[(i + 3) % len(adjectives)]
	noun = nouns[(i + 2) % len(nouns)]
	records_b.append({"id": i, "sentence": f"An {adj} {noun}.", "label": (i + 1) % 2, "_ts": i + 50})
	for i in range(200, 250):
	adj = adjectives[i % len(adjectives)]
	noun = nouns[i % len(nouns)]
	records_b.append({"id": i, "sentence": f"The {adj} {noun}.", "label": i % 2, "_ts": i})
	source = "synthetic (SST-2 style, 150 + 100 records with 50 overlap)"

	return records_a, records_b, source


	# -----------------------------------------------------------------
	# TAB 1 -- Dataset Merge
	# -----------------------------------------------------------------

	def run_dataset_merge(strategy_name: str):
	from crdt_merge.dataframe import merge as df_merge
	from crdt_merge.strategies import MergeSchema, LWW, MaxWins, MinWins, UnionSet

	strategy_map = {
	"LWW": LWW(),
	"MaxWins": MaxWins(),
	"MinWins": MinWins(),
	"Union": UnionSet(),
	}
	schema = MergeSchema(default=strategy_map[strategy_name])

	records_a, records_b, source = _load_dataset_records()
	t0 = time.perf_counter()

	try:
	merged = df_merge(records_a, records_b, key="id", schema=schema, timestamp_col="_ts")
	elapsed = (time.perf_counter() - t0) * 1000

	# Verify commutativity
	merged_ba = df_merge(records_b, records_a, key="id", schema=schema, timestamp_col="_ts")
	ids_ab = sorted([r["id"] for r in merged])
	ids_ba = sorted([r["id"] for r in merged_ba])
	comm_pass = ids_ab == ids_ba

	summary_md = f"""
	Dataset Merge Complete

	\| Metric \| Value \|
	\|---\|---\|
	\| Source \| {source} \|
	\| Strategy \| {strategy_name} \|
	\| Node A records \| {len(records_a)} \|
	\| Node B records \| {len(records_b)} \|
	\| Overlapping IDs \| {len(set(r['id'] for r in records_a) & set(r['id'] for r in records_b))} \|
	\| Merged records \| {len(merged)} \|
	\| Elapsed \| {elapsed:.1f}ms \|
	\| Commutative (merge_AB == merge_BA) \| {"PASS" if comm_pass else "FAIL"} \|

	### Understanding the Results

	- Merged Records Table: Shows the first 20 rows after merging Node A and Node B. For overlapping record IDs (where both nodes have the same row but different values), the selected strategy decides which value wins.
	- Strategy Behavior:
	- `LWW` (Last-Writer-Wins) — the record with the later timestamp (`_ts`) wins. This is the most common strategy in distributed databases.
	- `MaxWins` — for numeric fields, the larger value wins. For text, lexicographic max.
	- `MinWins` — the smaller value wins. Useful for minimum-bid auctions or earliest-deadline scenarios.
	- `Union` — keeps all values as a set (no data is lost, but deduplication may be needed downstream).
	- Commutativity PASS means `merge(A, B)` and `merge(B, A)` produce identical results — a core CRDT guarantee. This ensures any two replicas performing the merge get the same output regardless of order.
	"""

	# E4 Trust Layer -- trust scores and Merkle provenance for the merge
	e4_md = ""
	try:
	from crdt_merge.e4 import TypedTrustScore
	from crdt_merge.e4.delta_trust_lattice import DeltaTrustLattice
	from crdt_merge.e4.trust_bound_merkle import TrustBoundMerkle

	ids_a = set(r["id"] for r in records_a)
	ids_b = set(r["id"] for r in records_b)

	lattice_a = DeltaTrustLattice(peer_id="node_A")
	lattice_b = DeltaTrustLattice(peer_id="node_B")

	score_a_self = lattice_a.get_trust("node_A")
	score_b_self = lattice_b.get_trust("node_B")
	score_a_from_b = lattice_b.get_trust("node_A")
	score_b_from_a = lattice_a.get_trust("node_B")

	merkle = TrustBoundMerkle(trust_lattice=lattice_a)
	for r in merged:
	originator = "node_A"
	if r["id"] in ids_b and r["id"] not in ids_a:
	originator = "node_B"
	elif r["id"] in ids_b and r["id"] in ids_a:
	originator = "node_B" if r.get("_ts", 0) >= 150 else "node_A"
	merkle.insert_leaf(
	key=str(r["id"]),
	data=json.dumps(r, default=str).encode(),
	originator=originator,
	)
	root_hash = merkle.recompute()

	e4_md = f"""
	---
	### E4 Trust Layer

	\| Peer \| Lattice \| Overall Trust \| Status \|
	\|------\|---------\|--------------\|--------\|
	\| node_A \| node_A (self) \| {score_a_self.overall_trust():.3f} \| {"Probationary" if score_a_self.overall_trust() <= 0.5 else "Trusted"} \|
	\| node_B \| node_B (self) \| {score_b_self.overall_trust():.3f} \| {"Probationary" if score_b_self.overall_trust() <= 0.5 else "Trusted"} \|
	\| node_A \| node_B (cross) \| {score_a_from_b.overall_trust():.3f} \| {"Probationary" if score_a_from_b.overall_trust() <= 0.5 else "Trusted"} \|
	\| node_B \| node_A (cross) \| {score_b_from_a.overall_trust():.3f} \| {"Probationary" if score_b_from_a.overall_trust() <= 0.5 else "Trusted"} \|

	Merkle Provenance Root: `{root_hash}`
	Merged records in Merkle tree: {len(merged)}
	Trust scoring: All merge participants start at probationary (0.5) trust. Trust accrues over time via successful merges and evidence accumulation.
	"""
	except Exception as e:
	e4_md = f"\n\n---\n### E4 Trust Layer\n\nE4 trust module unavailable: {e}\n"

	summary_md = summary_md + e4_md

	display_rows = merged[:20]
	return display_rows, summary_md

	except Exception as e:
	return [], f"Error: {e}"


	# -----------------------------------------------------------------
	# TAB 2 -- Conflict Analysis
	# -----------------------------------------------------------------

	def run_conflict_analysis():
	from crdt_merge.dataframe import merge as df_merge
	from crdt_merge.strategies import MergeSchema, LWW, MaxWins, MinWins, UnionSet

	records_a, records_b, source = _load_dataset_records()
	overlap_ids = set(r["id"] for r in records_a) & set(r["id"] for r in records_b)

	strategy_map = {
	"LWW": LWW(),
	"MaxWins": MaxWins(),
	"MinWins": MinWins(),
	"Union": UnionSet(),
	}

	fields = ["sentence", "label"]
	results_by_strategy = {}

	for strat_name, strat in strategy_map.items():
	schema = MergeSchema(default=strat)
	try:
	merged = df_merge(records_a, records_b, key="id", schema=schema, timestamp_col="_ts")
	results_by_strategy[strat_name] = {r["id"]: r for r in merged if r["id"] in overlap_ids}
	except Exception as e:
	results_by_strategy[strat_name] = {}

	# Build conflict matrix: per-field, per-strategy-pair, how many records differ
	strat_names = list(strategy_map.keys())
	conflict_matrix = {}
	for field in fields:
	conflict_matrix[field] = np.zeros((len(strat_names), len(strat_names)), dtype=np.float32)
	for i, s1 in enumerate(strat_names):
	for j, s2 in enumerate(strat_names):
	if i == j:
	continue
	diffs = 0
	total = 0
	for rid in overlap_ids:
	r1 = results_by_strategy[s1].get(rid)
	r2 = results_by_strategy[s2].get(rid)
	if r1 is not None and r2 is not None:
	total += 1
	if str(r1.get(field, "")) != str(r2.get(field, "")):
	diffs += 1
	conflict_matrix[field][i, j] = diffs / max(total, 1)

	# Heatmap: combine fields side by side
	combined_z = np.concatenate([conflict_matrix[f] for f in fields], axis=1)
	col_labels = [f"{f}:{s}" for f in fields for s in strat_names]

	fig = go.Figure(data=go.Heatmap(
	z=combined_z.tolist(),
	x=col_labels,
	y=strat_names,
	colorscale=[[0, "#18181b"], [1, "#3b82f6"]],
	showscale=True,
	colorbar=dict(title="Conflict Rate"),
	))
	fig.update_layout(
	**PLOTLY_LAYOUT,
	title=f"Per-Field Conflict Matrix — Strategy vs Strategy (source: {source[:40]}...)",
	xaxis_title="Field : Strategy (column)",
	yaxis_title="Strategy (row)",
	)

	# Summary table: how many overlapping records each strategy resolves differently from LWW
	summary_rows = []
	for strat_name in strat_names:
	diffs_vs_lww = 0
	for rid in overlap_ids:
	r_lww = results_by_strategy["LWW"].get(rid)
	r_s = results_by_strategy[strat_name].get(rid)
	if r_lww and r_s:
	for field in fields:
	if str(r_lww.get(field, "")) != str(r_s.get(field, "")):
	diffs_vs_lww += 1
	break
	summary_rows.append({
	"Strategy": strat_name,
	"Conflicts vs LWW": diffs_vs_lww,
	"Overlap Records": len(overlap_ids),
	"Conflict Rate": f"{diffs_vs_lww / max(len(overlap_ids), 1):.2%}",
	})

	return summary_rows, fig


	def _e4_conflict_trust_analysis():
	"""Run E4 trust evidence analysis for detected conflicts. Returns markdown string."""
	try:
	from crdt_merge.e4 import TypedTrustScore
	from crdt_merge.e4.delta_trust_lattice import DeltaTrustLattice
	from crdt_merge.e4.proof_evidence import TrustEvidence, EVIDENCE_TYPES

	records_a, records_b, source = _load_dataset_records()
	overlap_ids = set(r["id"] for r in records_a) & set(r["id"] for r in records_b)
	map_a = {r["id"]: r for r in records_a}
	map_b = {r["id"]: r for r in records_b}

	lattice = DeltaTrustLattice(peer_id="auditor")

	evidence_log = []

	# Detect conflict types and fire evidence
	equivocation_count = 0
	invalid_delta_count = 0

	for rid in sorted(overlap_ids):
	ra = map_a.get(rid)
	rb = map_b.get(rid)
	if ra is None or rb is None:
	continue

	# Same key, different values = equivocation evidence
	if str(ra.get("sentence", "")) != str(rb.get("sentence", "")):
	ev = TrustEvidence.create(
	observer="auditor",
	target="node_B",
	evidence_type="equivocation",
	dimension="consistency",
	amount=-0.05,
	proof=f"id={rid} sentence diverged".encode(),
	)
	evidence_log.append(("equivocation", "node_B", rid, "consistency"))
	equivocation_count += 1

	# Label flip = invalid_delta evidence
	if ra.get("label") != rb.get("label"):
	ev = TrustEvidence.create(
	observer="auditor",
	target="node_B",
	evidence_type="invalid_delta",
	dimension="integrity",
	amount=-0.1,
	proof=f"id={rid} label flipped {ra.get('label')}->{rb.get('label')}".encode(),
	)
	evidence_log.append(("invalid_delta", "node_B", rid, "integrity"))
	invalid_delta_count += 1

	# Get trust scores after evidence
	score_a = lattice.get_trust("node_A")
	score_b = lattice.get_trust("node_B")

	# Build trust verdict table
	verdict_rows = []
	for ev_type, target, rid, dim in evidence_log[:10]:
	verdict_rows.append(f"\| {ev_type} \| {target} \| {rid} \| {dim} \|")
	if len(evidence_log) > 10:
	verdict_rows.append(f"\| ... \| ... \| ... \| ... \|")
	verdict_rows.append(f"\| (total {len(evidence_log)} events) \| \| \| \|")

	verdict_table = "\n".join(verdict_rows)

	md = f"""
	---
	### E4 Trust Layer -- Conflict Evidence

	Evidence Events Fired: {len(evidence_log)} total ({equivocation_count} equivocation, {invalid_delta_count} invalid_delta)

	\| Evidence Type \| Target \| Record ID \| Dimension \|
	\|--------------\|--------\|-----------\|-----------\|
	{verdict_table}

	Post-Evidence Trust Scores:

	\| Peer \| Overall Trust \| Verdict \|
	\|------\|--------------\|---------\|
	\| node_A \| {score_a.overall_trust():.3f} \| {"Probationary" if score_a.overall_trust() <= 0.5 else "Trusted"} -- no negative evidence \|
	\| node_B \| {score_b.overall_trust():.3f} \| {"Probationary" if score_b.overall_trust() <= 0.5 else "Trusted"} -- {len(evidence_log)} evidence events filed \|

	Interpretation: Conflicts between nodes degrade trust for the conflicting peer. The trust lattice records evidence so downstream consumers can make trust-aware merge decisions (e.g., reject merges from peers below a trust threshold).
	"""
	return md

	except Exception as e:
	return f"\n\n---\n### E4 Trust Layer -- Conflict Evidence\n\nE4 trust module unavailable: {e}\n"


	# -----------------------------------------------------------------
	# TAB 3 -- Core CRDT Primitives
	# -----------------------------------------------------------------

	def run_primitives_demo():
	from crdt_merge.core import GCounter, PNCounter, LWWRegister, ORSet

	results = {}

	# GCounter
	gc_a = GCounter()
	gc_a.increment("node_A", 5)
	gc_a.increment("node_A", 3)
	gc_b = GCounter()
	gc_b.increment("node_B", 7)
	gc_merged_ab = gc_a.merge(gc_b)
	gc_merged_ba = gc_b.merge(gc_a)
	results["GCounter"] = {
	"node_A_ops": "gc_a.increment('node_A', 5); gc_a.increment('node_A', 3) # value=8",
	"node_B_ops": "gc_b.increment('node_B', 7) # value=7",
	"merge_AB_value": gc_merged_ab.value,
	"merge_BA_value": gc_merged_ba.value,
	"commutative": gc_merged_ab.value == gc_merged_ba.value,
	}

	# PNCounter
	pn_a = PNCounter()
	pn_a.increment("n", 10)
	pn_a.decrement("n", 3)
	pn_b = PNCounter()
	pn_b.increment("n", 5)
	pn_merged_ab = pn_a.merge(pn_b)
	pn_merged_ba = pn_b.merge(pn_a)
	results["PNCounter"] = {
	"node_A_ops": "pn_a.increment('n', 10); pn_a.decrement('n', 3) # value=7",
	"node_B_ops": "pn_b.increment('n', 5) # value=5",
	"merge_AB_value": pn_merged_ab.value,
	"merge_BA_value": pn_merged_ba.value,
	"commutative": pn_merged_ab.value == pn_merged_ba.value,
	}

	# LWWRegister
	lww_a = LWWRegister()
	lww_a.set("model_v1", timestamp=1.0)
	lww_a.set("model_v2", timestamp=3.0)
	lww_b = LWWRegister()
	lww_b.set("model_v3", timestamp=2.0)
	lww_merged_ab = lww_a.merge(lww_b)
	lww_merged_ba = lww_b.merge(lww_a)
	results["LWWRegister"] = {
	"node_A_ops": "lww_a.set('model_v1', timestamp=1.0); lww_a.set('model_v2', timestamp=3.0)",
	"node_B_ops": "lww_b.set('model_v3', timestamp=2.0)",
	"merge_AB_value": str(lww_merged_ab.value),
	"merge_BA_value": str(lww_merged_ba.value),
	"commutative": str(lww_merged_ab.value) == str(lww_merged_ba.value),
	}

	# ORSet
	orset_a = ORSet()
	orset_a.add("alpha")
	orset_a.add("beta")
	tag_beta = orset_a.add("gamma")
	orset_b = ORSet()
	orset_b.add("beta")
	orset_b.add("delta")
	orset_merged_ab = orset_a.merge(orset_b)
	orset_merged_ba = orset_b.merge(orset_a)
	results["ORSet"] = {
	"node_A_ops": "orset_a.add('alpha'); orset_a.add('beta'); orset_a.add('gamma')",
	"node_B_ops": "orset_b.add('beta'); orset_b.add('delta')",
	"merge_AB_value": str(sorted(orset_merged_ab.value)),
	"merge_BA_value": str(sorted(orset_merged_ba.value)),
	"commutative": sorted(orset_merged_ab.value) == sorted(orset_merged_ba.value),
	}

	rows = []
	for name, data in results.items():
	rows.append({
	"Primitive": name,
	"Node A Operations": data["node_A_ops"],
	"Node B Operations": data["node_B_ops"],
	"merge(A,B) Value": str(data["merge_AB_value"]),
	"merge(B,A) Value": str(data["merge_BA_value"]),
	"Commutative": "PASS" if data["commutative"] else "FAIL",
	})

	return rows


	def _e4_primitives_trust():
	"""Run E4 trust primitives alongside core CRDTs. Returns markdown string."""
	try:
	from crdt_merge.e4 import TypedTrustScore, FrozenDict
	from crdt_merge.e4.delta_trust_lattice import DeltaTrustLattice
	from crdt_merge.e4.trust_bound_merkle import TrustBoundMerkle
	from crdt_merge.e4.causal_trust_clock import CausalTrustClock
	from crdt_merge.e4.pco import AggregateProofCarryingOperation

	# CausalTrustClock demo
	clock_a = CausalTrustClock(peer_id="node_A")
	clock_b = CausalTrustClock(peer_id="node_B")

	# Simulate operations on each clock
	clock_a = clock_a.increment() # op 1
	clock_a = clock_a.increment() # op 2
	clock_a = clock_a.increment() # op 3
	clock_b = clock_b.increment() # op 1
	clock_b = clock_b.increment() # op 2

	clock_a_time = clock_a.logical_time
	clock_b_time = clock_b.logical_time

	# Merge clocks
	clock_merged = clock_a.merge(clock_b)
	clock_merged_time = clock_merged.logical_time

	# Trust-Bound Merkle tree wrapping primitive operations
	lattice = DeltaTrustLattice(peer_id="node_A")
	merkle = TrustBoundMerkle(trust_lattice=lattice)

	ops = [
	("gcounter_inc_A", b"increment(node_A, 5)", "node_A"),
	("gcounter_inc_B", b"increment(node_B, 7)", "node_B"),
	("pncounter_inc", b"increment(n, 10)", "node_A"),
	("pncounter_dec", b"decrement(n, 3)", "node_A"),
	("lww_set_v1", b"set(model_v1, ts=1.0)", "node_A"),
	("lww_set_v3", b"set(model_v3, ts=2.0)", "node_B"),
	("orset_add_alpha", b"add(alpha)", "node_A"),
	("orset_add_delta", b"add(delta)", "node_B"),
	]

	for key, data, orig in ops:
	merkle.insert_leaf(key=key, data=data, originator=orig)

	merkle_root = merkle.recompute()

	# PCO wire format
	pco = AggregateProofCarryingOperation(
	aggregate_hash=b'\x00' * 32,
	signature=b'\x00' * 64,
	originator_id="node_A",
	metadata=b'{"ops": 8}',
	merkle_root_at_creation=str(merkle_root),
	clock_snapshot=b'\x03',
	trust_vector_hash="tvh_demo",
	delta_bounds=(),
	)
	wire = pco.to_wire()
	wire_size = len(wire)

	md = f"""
	---
	### E4 Trust Layer -- Primitive-Level Trust

	#### CausalTrustClock

	\| Clock \| Operations \| Logical Time \|
	\|-------\|-----------\|-------------\|
	\| node_A \| 3 increments \| {clock_a_time} \|
	\| node_B \| 2 increments \| {clock_b_time} \|
	\| merged(A, B) \| merge \| {clock_merged_time} \|

	Causal trust clocks are immutable -- each `increment()` returns a new clock instance. The merged clock captures the causal frontier of both peers.

	#### Trust-Bound Merkle Tree

	\| Property \| Value \|
	\|----------\|-------\|
	\| Leaves inserted \| {len(ops)} \|
	\| Operations covered \| GCounter, PNCounter, LWWRegister, ORSet \|
	\| Merkle root \| `{merkle_root}` \|

	Every CRDT operation is recorded as a Merkle leaf with its originator. The trust-bound Merkle tree links each leaf to the originator's trust score in the lattice, enabling per-operation provenance auditing.

	#### Proof-Carrying Operation (PCO) Wire Format

	\| Property \| Value \|
	\|----------\|-------\|
	\| Wire size \| {wire_size} bytes \|
	\| Originator \| node_A \|
	\| Merkle root at creation \| `{str(merkle_root)[:32]}...` \|
	\| Format \| AggregateProofCarryingOperation \|

	The PCO bundles a cryptographic proof (aggregate hash + signature), the Merkle root at time of creation, and a clock snapshot into a compact wire format suitable for gossip protocols.
	"""
	return md

	except Exception as e:
	return f"\n\n---\n### E4 Trust Layer -- Primitive-Level Trust\n\nE4 trust module unavailable: {e}\n"



	# -----------------------------------------------------------------
	# Gradio UI
	# -----------------------------------------------------------------

	with gr.Blocks(theme=THEME, css=CSS, title="crdt-merge — Data Playground") as demo:
	gr.Markdown(NAV_MD)
	gr.Markdown(HERO_MD)

	with gr.Tabs():

	# -- TAB 1 --------------------------------------------------------
	with gr.Tab("Dataset Merge"):
	gr.Markdown("""
	## Dataset Merge

	Loads glue/sst2 from HuggingFace datasets (first 200 rows) or uses synthetic fallback.
	Splits into two node partitions with 50 overlapping records.
	Demonstrates conflict-free merge with configurable strategy.

	> E4 Trust Scoring Active (v0.9.5+): All merge operations now carry typed trust scores by default. Every record merge accumulates accuracy, consistency, recency, and provenance trust dimensions via GCounter-backed convergent accumulators. Trust propagation adds zero API overhead -- it activates transparently on `import crdt_merge`.
	""")

	with gr.Row():
	strat_dd = gr.Dropdown(
	choices=STRATEGIES_DF,
	value="LWW",
	label="Merge Strategy",
	info="LWW = Last Write Wins (by timestamp). MaxWins/MinWins = field max/min. Union = set union.",
	)
	merge_ds_btn = gr.Button("Run Dataset Merge", variant="primary")

	merge_summary_md = gr.Markdown()
	merge_result_table = gr.Dataframe(
	headers=["id", "sentence", "label", "_ts"],
	label="Merged Records (first 20 rows)",
	wrap=True,
	)

	def _run_ds_merge(strategy):
	rows, summary = run_dataset_merge(strategy)
	df_data = [[r.get("id", ""), r.get("sentence", ""), r.get("label", ""), r.get("_ts", "")] for r in rows]
	return summary, df_data

	merge_ds_btn.click(_run_ds_merge, inputs=[strat_dd], outputs=[merge_summary_md, merge_result_table])
	demo.load(lambda: _run_ds_merge("LWW"), outputs=[merge_summary_md, merge_result_table])

	# -- TAB 2 --------------------------------------------------------
	with gr.Tab("Conflict Analysis"):
	gr.Markdown("""
	## Conflict Analysis

	Runs the same dataset through all 4 strategies and computes per-field conflict rates
	between strategy pairs. The heatmap shows how often two strategies disagree on a record.

	### How to Read the Results

	- Conflict Rate Heatmap: Each cell shows the fraction of overlapping records where two strategies produce different values for a given field. Brighter = more disagreement. The diagonal is always 0 (a strategy agrees with itself).
	- `sentence:LWW` vs `sentence:MaxWins` = "how often do LWW and MaxWins disagree on the sentence field?"
	- High conflict rates between strategies mean the choice of strategy materially affects the merged output.
	- Comparison Table: Shows how each strategy differs from LWW (the baseline). `0 conflicts` = identical behavior for this dataset. Higher numbers indicate the strategy resolves more records differently.
	- Why this matters: In production systems, teams need to understand which strategy is appropriate for their data. If all strategies agree, the choice doesn't matter. If they diverge significantly, the strategy selection is a critical design decision.
	""")

	with gr.Row():
	conflict_btn = gr.Button("Run Conflict Analysis", variant="primary")

	conflict_chart = gr.Plot(label="Per-Field Conflict Matrix Heatmap")
	conflict_table = gr.Dataframe(
	headers=["Strategy", "Conflicts vs LWW", "Overlap Records", "Conflict Rate"],
	label="Strategy Comparison",
	)
	conflict_e4_md = gr.Markdown()

	def _run_conflict():
	rows, fig = run_conflict_analysis()
	df_data = [
	[r["Strategy"], r["Conflicts vs LWW"], r["Overlap Records"], r["Conflict Rate"]]
	for r in rows
	]
	e4_md = _e4_conflict_trust_analysis()
	return fig, df_data, e4_md

	conflict_btn.click(_run_conflict, outputs=[conflict_chart, conflict_table, conflict_e4_md])
	demo.load(_run_conflict, outputs=[conflict_chart, conflict_table, conflict_e4_md])

	# -- TAB 3 --------------------------------------------------------
	with gr.Tab("Core CRDT Primitives"):
	gr.Markdown("""
	## Core CRDT Primitives

	Live demonstration of GCounter, PNCounter, LWWRegister, and ORSet.
	Each primitive is operated on two nodes independently, then merged in both directions.
	Commutativity is verified: merge(A,B) must equal merge(B,A).

	### How to Read the Results

	\| Primitive \| What It Does \| Merge Semantics \|
	\|---\|---\|---\|
	\| GCounter \| Grow-only counter \| Each node's count is tracked separately. Merge takes the max per node, then sums. Node A=8 + Node B=7 → merged=15. \|
	\| PNCounter \| Increment/decrement counter \| Two internal GCounters (positive + negative). Merge takes max per node for each. Net value = positives − negatives. \|
	\| LWWRegister \| Last-Writer-Wins register \| Stores a single value + timestamp. Merge keeps the value with the latest timestamp. Node A writes "model_v2" at t=3.0 > Node B's t=2.0, so A wins. \|
	\| ORSet \| Observed-Remove Set \| Add/remove elements with unique tags. Merge is the union of all adds minus confirmed removes. Both nodes' elements appear in the merged set. \|

	- merge(A,B) = merge(B,A): The "Commutative" column proves this. PASS means the primitive is safe for distributed use — merge order doesn't affect the result.
	- These are the building blocks that power crdt-merge's higher-level DataFrame and model merge operations.
	""")

	with gr.Row():
	prim_btn = gr.Button("Run Primitives Demo", variant="primary")

	prim_table = gr.Dataframe(
	headers=["Primitive", "Node A Operations", "Node B Operations",
	"merge(A,B) Value", "merge(B,A) Value", "Commutative"],
	label="Primitive Commutativity Proof",
	wrap=True,
	)
	prim_e4_md = gr.Markdown()

	def _run_prims():
	rows = run_primitives_demo()
	table_data = [
	[r["Primitive"], r["Node A Operations"], r["Node B Operations"],
	r["merge(A,B) Value"], r["merge(B,A) Value"], r["Commutative"]]
	for r in rows
	]
	e4_md = _e4_primitives_trust()
	return table_data, e4_md

	prim_btn.click(_run_prims, outputs=[prim_table, prim_e4_md])
	demo.load(_run_prims, outputs=[prim_table, prim_e4_md])

	gr.Markdown("""
	---

	crdt-merge v0.9.5 · Patent UK 2607132.4, GB2608127.3 · E4 Trust-Delta · BUSL-1.1 → Apache 2.0 (2028-03-29)

	[🏠 Flagship](https://huggingface.co/spaces/optitransfer/crdt-merge) · [🔬 Data Playground](https://huggingface.co/spaces/optitransfer/crdt-merge-data) · [🌐 Federation](https://huggingface.co/spaces/optitransfer/crdt-merge-federation) · [GitHub](https://github.com/mgillr/crdt-merge) · [⭐ Star Repo](https://github.com/mgillr/crdt-merge/stargazers) · [👁️ Watch](https://github.com/mgillr/crdt-merge/subscription) · [📐 Architecture Deep Dive](https://github.com/mgillr/crdt-merge/tree/main/docs/architecture) · [PyPI](https://pypi.org/project/crdt-merge/) · `pip install crdt-merge`
	""")

	if __name__ == "__main__":
	demo.launch()