crdt-merge-data / app.py
gille1983's picture
Integrate E4 trust output into all demo tabs
8350470
# SPDX-License-Identifier: BUSL-1.1
# Copyright 2026 Ryan Gillespie / Optitransfer
#
# Licensed under the Business Source License 1.1 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://github.com/mgillr/crdt-merge/blob/main/LICENSE
#
# Change Date: 2028-03-29
# Change License: Apache License, Version 2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# On 2028-03-29 this file converts to Apache License, Version 2.0.
"""
crdt-merge v0.9.5 — Data Playground HuggingFace Space
Tabular CRDT merge, conflict analysis, and core primitive demonstrations.
"""
import os
import json
import time
import numpy as np
import gradio as gr
import plotly.graph_objects as go
CSS = """
.gradio-container { background: #09090b !important; font-family: 'Inter', system-ui, sans-serif !important; }
.gr-button-primary { background: linear-gradient(135deg, #2563eb, #1d4ed8) !important; border: none !important; color: #fff !important; font-weight: 600 !important; }
footer { display: none !important; }
.tab-nav button { color: #a1a1aa !important; font-size: 13px !important; letter-spacing: 0.05em !important; text-transform: uppercase !important; font-weight: 600 !important; padding: 10px 16px !important; }
.tab-nav button.selected { color: #f4f4f5 !important; border-bottom: 2px solid #3b82f6 !important; }
.tab-nav button:hover { color: #e4e4e7 !important; }
code, .monospace { font-family: 'JetBrains Mono', ui-monospace, monospace !important; font-size: 13px !important; }
h1, h2, h3 { color: #f4f4f5 !important; }
p, li { color: #d4d4d8 !important; font-size: 15px !important; line-height: 1.7 !important; }
label, .gr-input-label, .label-wrap span { color: #e4e4e7 !important; font-size: 14px !important; font-weight: 500 !important; }
input, textarea, select, .gr-box { color: #f4f4f5 !important; background: #18181b !important; border-color: #3f3f46 !important; }
.gr-dataframe th, table th { color: #f4f4f5 !important; background: #18181b !important; font-weight: 600 !important; font-size: 13px !important; }
.gr-dataframe td, table td { color: #d4d4d8 !important; font-size: 13px !important; border-color: #27272a !important; }
.gr-dataframe tr:hover td { background: #1e1e22 !important; }
.gr-info, .info { color: #a1a1aa !important; font-size: 12px !important; }
strong { color: #f4f4f5 !important; }
"""
PLOTLY_LAYOUT = dict(
paper_bgcolor="#09090b",
plot_bgcolor="#18181b",
font=dict(color="#a1a1aa", family="Inter"),
xaxis=dict(gridcolor="#27272a", linecolor="#27272a"),
yaxis=dict(gridcolor="#27272a", linecolor="#27272a"),
margin=dict(l=60, r=20, t=40, b=60),
)
THEME = gr.themes.Base(
primary_hue=gr.themes.colors.blue,
neutral_hue=gr.themes.colors.zinc,
)
NAV_MD = """**[🏠 Flagship](https://huggingface.co/spaces/optitransfer/crdt-merge) · [🔬 Data Playground](https://huggingface.co/spaces/optitransfer/crdt-merge-data) · [🌐 Federation](https://huggingface.co/spaces/optitransfer/crdt-merge-federation) · [GitHub ↗](https://github.com/mgillr/crdt-merge) · [⭐ Star Repo](https://github.com/mgillr/crdt-merge/stargazers) · [👁️ Watch](https://github.com/mgillr/crdt-merge/subscription) · [📐 Architecture Deep Dive](https://github.com/mgillr/crdt-merge/tree/main/docs/architecture) · [PyPI ↗](https://pypi.org/project/crdt-merge/)**"""
HERO_MD = """
# crdt-merge — Data Playground
Tabular CRDT merge for DataFrames and datasets. Conflict-free record merge, deduplication, and provenance tracking.
`pip install crdt-merge` · [GitHub](https://github.com/mgillr/crdt-merge) · [PyPI](https://pypi.org/project/crdt-merge/) · Patent UK 2607132.4, GB2608127.3 · E4 Trust-Delta Architecture
"""
STRATEGIES_DF = ["LWW", "MaxWins", "MinWins", "Union"]
# -----------------------------------------------------------------
# Data loading
# -----------------------------------------------------------------
def _load_dataset_records():
"""Try HF datasets first, fallback to synthetic."""
source = "synthetic"
records_a = []
records_b = []
try:
from datasets import load_dataset
ds = load_dataset("glue", "sst2", split="train[:200]")
all_records = [{"id": i, "sentence": ds[i]["sentence"], "label": ds[i]["label"], "_ts": i}
for i in range(len(ds))]
records_a = all_records[:150]
# Node B: overlapping records (100-149) get modified values + later timestamps
records_b = []
for r in all_records[100:]:
rid = r["id"]
if rid < 150: # overlapping region -- simulate a different node's edits
records_b.append({
"id": rid,
"sentence": r["sentence"].strip() + " [node-B edit]",
"label": 1 - r["label"], # flip label to create real conflict
"_ts": rid + 50, # later timestamp for LWW
})
else:
records_b.append(r)
source = "glue/sst2 (HuggingFace datasets, 200 rows, 50 conflicting overlap)"
except Exception:
pass
if not records_a:
rng = np.random.RandomState(7)
adjectives = ["good", "bad", "great", "poor", "excellent", "terrible", "fine", "awful"]
nouns = ["film", "movie", "picture", "show", "performance", "script", "cast", "story"]
for i in range(200):
adj = adjectives[i % len(adjectives)]
noun = nouns[i % len(nouns)]
records_a.append({"id": i, "sentence": f"A {adj} {noun}.", "label": i % 2, "_ts": i})
for i in range(100, 200):
adj = adjectives[(i + 3) % len(adjectives)]
noun = nouns[(i + 2) % len(nouns)]
records_b.append({"id": i, "sentence": f"An {adj} {noun}.", "label": (i + 1) % 2, "_ts": i + 50})
for i in range(200, 250):
adj = adjectives[i % len(adjectives)]
noun = nouns[i % len(nouns)]
records_b.append({"id": i, "sentence": f"The {adj} {noun}.", "label": i % 2, "_ts": i})
source = "synthetic (SST-2 style, 150 + 100 records with 50 overlap)"
return records_a, records_b, source
# -----------------------------------------------------------------
# TAB 1 -- Dataset Merge
# -----------------------------------------------------------------
def run_dataset_merge(strategy_name: str):
from crdt_merge.dataframe import merge as df_merge
from crdt_merge.strategies import MergeSchema, LWW, MaxWins, MinWins, UnionSet
strategy_map = {
"LWW": LWW(),
"MaxWins": MaxWins(),
"MinWins": MinWins(),
"Union": UnionSet(),
}
schema = MergeSchema(default=strategy_map[strategy_name])
records_a, records_b, source = _load_dataset_records()
t0 = time.perf_counter()
try:
merged = df_merge(records_a, records_b, key="id", schema=schema, timestamp_col="_ts")
elapsed = (time.perf_counter() - t0) * 1000
# Verify commutativity
merged_ba = df_merge(records_b, records_a, key="id", schema=schema, timestamp_col="_ts")
ids_ab = sorted([r["id"] for r in merged])
ids_ba = sorted([r["id"] for r in merged_ba])
comm_pass = ids_ab == ids_ba
summary_md = f"""
**Dataset Merge Complete**
| Metric | Value |
|---|---|
| Source | {source} |
| Strategy | {strategy_name} |
| Node A records | {len(records_a)} |
| Node B records | {len(records_b)} |
| Overlapping IDs | {len(set(r['id'] for r in records_a) & set(r['id'] for r in records_b))} |
| Merged records | {len(merged)} |
| Elapsed | {elapsed:.1f}ms |
| Commutative (merge_AB == merge_BA) | **{"PASS" if comm_pass else "FAIL"}** |
### Understanding the Results
- **Merged Records Table:** Shows the first 20 rows after merging Node A and Node B. For overlapping record IDs (where both nodes have the same row but different values), the selected strategy decides which value wins.
- **Strategy Behavior:**
- `LWW` (Last-Writer-Wins) — the record with the **later timestamp** (`_ts`) wins. This is the most common strategy in distributed databases.
- `MaxWins` — for numeric fields, the **larger value** wins. For text, lexicographic max.
- `MinWins` — the **smaller value** wins. Useful for minimum-bid auctions or earliest-deadline scenarios.
- `Union` — keeps **all values** as a set (no data is lost, but deduplication may be needed downstream).
- **Commutativity PASS** means `merge(A, B)` and `merge(B, A)` produce identical results — a core CRDT guarantee. This ensures any two replicas performing the merge get the same output regardless of order.
"""
# E4 Trust Layer -- trust scores and Merkle provenance for the merge
e4_md = ""
try:
from crdt_merge.e4 import TypedTrustScore
from crdt_merge.e4.delta_trust_lattice import DeltaTrustLattice
from crdt_merge.e4.trust_bound_merkle import TrustBoundMerkle
ids_a = set(r["id"] for r in records_a)
ids_b = set(r["id"] for r in records_b)
lattice_a = DeltaTrustLattice(peer_id="node_A")
lattice_b = DeltaTrustLattice(peer_id="node_B")
score_a_self = lattice_a.get_trust("node_A")
score_b_self = lattice_b.get_trust("node_B")
score_a_from_b = lattice_b.get_trust("node_A")
score_b_from_a = lattice_a.get_trust("node_B")
merkle = TrustBoundMerkle(trust_lattice=lattice_a)
for r in merged:
originator = "node_A"
if r["id"] in ids_b and r["id"] not in ids_a:
originator = "node_B"
elif r["id"] in ids_b and r["id"] in ids_a:
originator = "node_B" if r.get("_ts", 0) >= 150 else "node_A"
merkle.insert_leaf(
key=str(r["id"]),
data=json.dumps(r, default=str).encode(),
originator=originator,
)
root_hash = merkle.recompute()
e4_md = f"""
---
### E4 Trust Layer
| Peer | Lattice | Overall Trust | Status |
|------|---------|--------------|--------|
| node_A | node_A (self) | {score_a_self.overall_trust():.3f} | {"Probationary" if score_a_self.overall_trust() <= 0.5 else "Trusted"} |
| node_B | node_B (self) | {score_b_self.overall_trust():.3f} | {"Probationary" if score_b_self.overall_trust() <= 0.5 else "Trusted"} |
| node_A | node_B (cross) | {score_a_from_b.overall_trust():.3f} | {"Probationary" if score_a_from_b.overall_trust() <= 0.5 else "Trusted"} |
| node_B | node_A (cross) | {score_b_from_a.overall_trust():.3f} | {"Probationary" if score_b_from_a.overall_trust() <= 0.5 else "Trusted"} |
**Merkle Provenance Root:** `{root_hash}`
**Merged records in Merkle tree:** {len(merged)}
**Trust scoring:** All merge participants start at probationary (0.5) trust. Trust accrues over time via successful merges and evidence accumulation.
"""
except Exception as e:
e4_md = f"\n\n---\n### E4 Trust Layer\n\nE4 trust module unavailable: {e}\n"
summary_md = summary_md + e4_md
display_rows = merged[:20]
return display_rows, summary_md
except Exception as e:
return [], f"Error: {e}"
# -----------------------------------------------------------------
# TAB 2 -- Conflict Analysis
# -----------------------------------------------------------------
def run_conflict_analysis():
from crdt_merge.dataframe import merge as df_merge
from crdt_merge.strategies import MergeSchema, LWW, MaxWins, MinWins, UnionSet
records_a, records_b, source = _load_dataset_records()
overlap_ids = set(r["id"] for r in records_a) & set(r["id"] for r in records_b)
strategy_map = {
"LWW": LWW(),
"MaxWins": MaxWins(),
"MinWins": MinWins(),
"Union": UnionSet(),
}
fields = ["sentence", "label"]
results_by_strategy = {}
for strat_name, strat in strategy_map.items():
schema = MergeSchema(default=strat)
try:
merged = df_merge(records_a, records_b, key="id", schema=schema, timestamp_col="_ts")
results_by_strategy[strat_name] = {r["id"]: r for r in merged if r["id"] in overlap_ids}
except Exception as e:
results_by_strategy[strat_name] = {}
# Build conflict matrix: per-field, per-strategy-pair, how many records differ
strat_names = list(strategy_map.keys())
conflict_matrix = {}
for field in fields:
conflict_matrix[field] = np.zeros((len(strat_names), len(strat_names)), dtype=np.float32)
for i, s1 in enumerate(strat_names):
for j, s2 in enumerate(strat_names):
if i == j:
continue
diffs = 0
total = 0
for rid in overlap_ids:
r1 = results_by_strategy[s1].get(rid)
r2 = results_by_strategy[s2].get(rid)
if r1 is not None and r2 is not None:
total += 1
if str(r1.get(field, "")) != str(r2.get(field, "")):
diffs += 1
conflict_matrix[field][i, j] = diffs / max(total, 1)
# Heatmap: combine fields side by side
combined_z = np.concatenate([conflict_matrix[f] for f in fields], axis=1)
col_labels = [f"{f}:{s}" for f in fields for s in strat_names]
fig = go.Figure(data=go.Heatmap(
z=combined_z.tolist(),
x=col_labels,
y=strat_names,
colorscale=[[0, "#18181b"], [1, "#3b82f6"]],
showscale=True,
colorbar=dict(title="Conflict Rate"),
))
fig.update_layout(
**PLOTLY_LAYOUT,
title=f"Per-Field Conflict Matrix — Strategy vs Strategy (source: {source[:40]}...)",
xaxis_title="Field : Strategy (column)",
yaxis_title="Strategy (row)",
)
# Summary table: how many overlapping records each strategy resolves differently from LWW
summary_rows = []
for strat_name in strat_names:
diffs_vs_lww = 0
for rid in overlap_ids:
r_lww = results_by_strategy["LWW"].get(rid)
r_s = results_by_strategy[strat_name].get(rid)
if r_lww and r_s:
for field in fields:
if str(r_lww.get(field, "")) != str(r_s.get(field, "")):
diffs_vs_lww += 1
break
summary_rows.append({
"Strategy": strat_name,
"Conflicts vs LWW": diffs_vs_lww,
"Overlap Records": len(overlap_ids),
"Conflict Rate": f"{diffs_vs_lww / max(len(overlap_ids), 1):.2%}",
})
return summary_rows, fig
def _e4_conflict_trust_analysis():
"""Run E4 trust evidence analysis for detected conflicts. Returns markdown string."""
try:
from crdt_merge.e4 import TypedTrustScore
from crdt_merge.e4.delta_trust_lattice import DeltaTrustLattice
from crdt_merge.e4.proof_evidence import TrustEvidence, EVIDENCE_TYPES
records_a, records_b, source = _load_dataset_records()
overlap_ids = set(r["id"] for r in records_a) & set(r["id"] for r in records_b)
map_a = {r["id"]: r for r in records_a}
map_b = {r["id"]: r for r in records_b}
lattice = DeltaTrustLattice(peer_id="auditor")
evidence_log = []
# Detect conflict types and fire evidence
equivocation_count = 0
invalid_delta_count = 0
for rid in sorted(overlap_ids):
ra = map_a.get(rid)
rb = map_b.get(rid)
if ra is None or rb is None:
continue
# Same key, different values = equivocation evidence
if str(ra.get("sentence", "")) != str(rb.get("sentence", "")):
ev = TrustEvidence.create(
observer="auditor",
target="node_B",
evidence_type="equivocation",
dimension="consistency",
amount=-0.05,
proof=f"id={rid} sentence diverged".encode(),
)
evidence_log.append(("equivocation", "node_B", rid, "consistency"))
equivocation_count += 1
# Label flip = invalid_delta evidence
if ra.get("label") != rb.get("label"):
ev = TrustEvidence.create(
observer="auditor",
target="node_B",
evidence_type="invalid_delta",
dimension="integrity",
amount=-0.1,
proof=f"id={rid} label flipped {ra.get('label')}->{rb.get('label')}".encode(),
)
evidence_log.append(("invalid_delta", "node_B", rid, "integrity"))
invalid_delta_count += 1
# Get trust scores after evidence
score_a = lattice.get_trust("node_A")
score_b = lattice.get_trust("node_B")
# Build trust verdict table
verdict_rows = []
for ev_type, target, rid, dim in evidence_log[:10]:
verdict_rows.append(f"| {ev_type} | {target} | {rid} | {dim} |")
if len(evidence_log) > 10:
verdict_rows.append(f"| ... | ... | ... | ... |")
verdict_rows.append(f"| *(total {len(evidence_log)} events)* | | | |")
verdict_table = "\n".join(verdict_rows)
md = f"""
---
### E4 Trust Layer -- Conflict Evidence
**Evidence Events Fired:** {len(evidence_log)} total ({equivocation_count} equivocation, {invalid_delta_count} invalid_delta)
| Evidence Type | Target | Record ID | Dimension |
|--------------|--------|-----------|-----------|
{verdict_table}
**Post-Evidence Trust Scores:**
| Peer | Overall Trust | Verdict |
|------|--------------|---------|
| node_A | {score_a.overall_trust():.3f} | {"Probationary" if score_a.overall_trust() <= 0.5 else "Trusted"} -- no negative evidence |
| node_B | {score_b.overall_trust():.3f} | {"Probationary" if score_b.overall_trust() <= 0.5 else "Trusted"} -- {len(evidence_log)} evidence events filed |
**Interpretation:** Conflicts between nodes degrade trust for the conflicting peer. The trust lattice records evidence so downstream consumers can make trust-aware merge decisions (e.g., reject merges from peers below a trust threshold).
"""
return md
except Exception as e:
return f"\n\n---\n### E4 Trust Layer -- Conflict Evidence\n\nE4 trust module unavailable: {e}\n"
# -----------------------------------------------------------------
# TAB 3 -- Core CRDT Primitives
# -----------------------------------------------------------------
def run_primitives_demo():
from crdt_merge.core import GCounter, PNCounter, LWWRegister, ORSet
results = {}
# GCounter
gc_a = GCounter()
gc_a.increment("node_A", 5)
gc_a.increment("node_A", 3)
gc_b = GCounter()
gc_b.increment("node_B", 7)
gc_merged_ab = gc_a.merge(gc_b)
gc_merged_ba = gc_b.merge(gc_a)
results["GCounter"] = {
"node_A_ops": "gc_a.increment('node_A', 5); gc_a.increment('node_A', 3) # value=8",
"node_B_ops": "gc_b.increment('node_B', 7) # value=7",
"merge_AB_value": gc_merged_ab.value,
"merge_BA_value": gc_merged_ba.value,
"commutative": gc_merged_ab.value == gc_merged_ba.value,
}
# PNCounter
pn_a = PNCounter()
pn_a.increment("n", 10)
pn_a.decrement("n", 3)
pn_b = PNCounter()
pn_b.increment("n", 5)
pn_merged_ab = pn_a.merge(pn_b)
pn_merged_ba = pn_b.merge(pn_a)
results["PNCounter"] = {
"node_A_ops": "pn_a.increment('n', 10); pn_a.decrement('n', 3) # value=7",
"node_B_ops": "pn_b.increment('n', 5) # value=5",
"merge_AB_value": pn_merged_ab.value,
"merge_BA_value": pn_merged_ba.value,
"commutative": pn_merged_ab.value == pn_merged_ba.value,
}
# LWWRegister
lww_a = LWWRegister()
lww_a.set("model_v1", timestamp=1.0)
lww_a.set("model_v2", timestamp=3.0)
lww_b = LWWRegister()
lww_b.set("model_v3", timestamp=2.0)
lww_merged_ab = lww_a.merge(lww_b)
lww_merged_ba = lww_b.merge(lww_a)
results["LWWRegister"] = {
"node_A_ops": "lww_a.set('model_v1', timestamp=1.0); lww_a.set('model_v2', timestamp=3.0)",
"node_B_ops": "lww_b.set('model_v3', timestamp=2.0)",
"merge_AB_value": str(lww_merged_ab.value),
"merge_BA_value": str(lww_merged_ba.value),
"commutative": str(lww_merged_ab.value) == str(lww_merged_ba.value),
}
# ORSet
orset_a = ORSet()
orset_a.add("alpha")
orset_a.add("beta")
tag_beta = orset_a.add("gamma")
orset_b = ORSet()
orset_b.add("beta")
orset_b.add("delta")
orset_merged_ab = orset_a.merge(orset_b)
orset_merged_ba = orset_b.merge(orset_a)
results["ORSet"] = {
"node_A_ops": "orset_a.add('alpha'); orset_a.add('beta'); orset_a.add('gamma')",
"node_B_ops": "orset_b.add('beta'); orset_b.add('delta')",
"merge_AB_value": str(sorted(orset_merged_ab.value)),
"merge_BA_value": str(sorted(orset_merged_ba.value)),
"commutative": sorted(orset_merged_ab.value) == sorted(orset_merged_ba.value),
}
rows = []
for name, data in results.items():
rows.append({
"Primitive": name,
"Node A Operations": data["node_A_ops"],
"Node B Operations": data["node_B_ops"],
"merge(A,B) Value": str(data["merge_AB_value"]),
"merge(B,A) Value": str(data["merge_BA_value"]),
"Commutative": "PASS" if data["commutative"] else "FAIL",
})
return rows
def _e4_primitives_trust():
"""Run E4 trust primitives alongside core CRDTs. Returns markdown string."""
try:
from crdt_merge.e4 import TypedTrustScore, FrozenDict
from crdt_merge.e4.delta_trust_lattice import DeltaTrustLattice
from crdt_merge.e4.trust_bound_merkle import TrustBoundMerkle
from crdt_merge.e4.causal_trust_clock import CausalTrustClock
from crdt_merge.e4.pco import AggregateProofCarryingOperation
# CausalTrustClock demo
clock_a = CausalTrustClock(peer_id="node_A")
clock_b = CausalTrustClock(peer_id="node_B")
# Simulate operations on each clock
clock_a = clock_a.increment() # op 1
clock_a = clock_a.increment() # op 2
clock_a = clock_a.increment() # op 3
clock_b = clock_b.increment() # op 1
clock_b = clock_b.increment() # op 2
clock_a_time = clock_a.logical_time
clock_b_time = clock_b.logical_time
# Merge clocks
clock_merged = clock_a.merge(clock_b)
clock_merged_time = clock_merged.logical_time
# Trust-Bound Merkle tree wrapping primitive operations
lattice = DeltaTrustLattice(peer_id="node_A")
merkle = TrustBoundMerkle(trust_lattice=lattice)
ops = [
("gcounter_inc_A", b"increment(node_A, 5)", "node_A"),
("gcounter_inc_B", b"increment(node_B, 7)", "node_B"),
("pncounter_inc", b"increment(n, 10)", "node_A"),
("pncounter_dec", b"decrement(n, 3)", "node_A"),
("lww_set_v1", b"set(model_v1, ts=1.0)", "node_A"),
("lww_set_v3", b"set(model_v3, ts=2.0)", "node_B"),
("orset_add_alpha", b"add(alpha)", "node_A"),
("orset_add_delta", b"add(delta)", "node_B"),
]
for key, data, orig in ops:
merkle.insert_leaf(key=key, data=data, originator=orig)
merkle_root = merkle.recompute()
# PCO wire format
pco = AggregateProofCarryingOperation(
aggregate_hash=b'\x00' * 32,
signature=b'\x00' * 64,
originator_id="node_A",
metadata=b'{"ops": 8}',
merkle_root_at_creation=str(merkle_root),
clock_snapshot=b'\x03',
trust_vector_hash="tvh_demo",
delta_bounds=(),
)
wire = pco.to_wire()
wire_size = len(wire)
md = f"""
---
### E4 Trust Layer -- Primitive-Level Trust
#### CausalTrustClock
| Clock | Operations | Logical Time |
|-------|-----------|-------------|
| node_A | 3 increments | {clock_a_time} |
| node_B | 2 increments | {clock_b_time} |
| merged(A, B) | merge | {clock_merged_time} |
Causal trust clocks are immutable -- each `increment()` returns a new clock instance. The merged clock captures the causal frontier of both peers.
#### Trust-Bound Merkle Tree
| Property | Value |
|----------|-------|
| Leaves inserted | {len(ops)} |
| Operations covered | GCounter, PNCounter, LWWRegister, ORSet |
| Merkle root | `{merkle_root}` |
Every CRDT operation is recorded as a Merkle leaf with its originator. The trust-bound Merkle tree links each leaf to the originator's trust score in the lattice, enabling per-operation provenance auditing.
#### Proof-Carrying Operation (PCO) Wire Format
| Property | Value |
|----------|-------|
| Wire size | {wire_size} bytes |
| Originator | node_A |
| Merkle root at creation | `{str(merkle_root)[:32]}...` |
| Format | AggregateProofCarryingOperation |
The PCO bundles a cryptographic proof (aggregate hash + signature), the Merkle root at time of creation, and a clock snapshot into a compact wire format suitable for gossip protocols.
"""
return md
except Exception as e:
return f"\n\n---\n### E4 Trust Layer -- Primitive-Level Trust\n\nE4 trust module unavailable: {e}\n"
# -----------------------------------------------------------------
# Gradio UI
# -----------------------------------------------------------------
with gr.Blocks(theme=THEME, css=CSS, title="crdt-merge — Data Playground") as demo:
gr.Markdown(NAV_MD)
gr.Markdown(HERO_MD)
with gr.Tabs():
# -- TAB 1 --------------------------------------------------------
with gr.Tab("Dataset Merge"):
gr.Markdown("""
## Dataset Merge
Loads glue/sst2 from HuggingFace datasets (first 200 rows) or uses synthetic fallback.
Splits into two node partitions with 50 overlapping records.
Demonstrates conflict-free merge with configurable strategy.
> **E4 Trust Scoring Active (v0.9.5+):** All merge operations now carry typed trust scores by default. Every record merge accumulates accuracy, consistency, recency, and provenance trust dimensions via GCounter-backed convergent accumulators. Trust propagation adds zero API overhead -- it activates transparently on `import crdt_merge`.
""")
with gr.Row():
strat_dd = gr.Dropdown(
choices=STRATEGIES_DF,
value="LWW",
label="Merge Strategy",
info="LWW = Last Write Wins (by timestamp). MaxWins/MinWins = field max/min. Union = set union.",
)
merge_ds_btn = gr.Button("Run Dataset Merge", variant="primary")
merge_summary_md = gr.Markdown()
merge_result_table = gr.Dataframe(
headers=["id", "sentence", "label", "_ts"],
label="Merged Records (first 20 rows)",
wrap=True,
)
def _run_ds_merge(strategy):
rows, summary = run_dataset_merge(strategy)
df_data = [[r.get("id", ""), r.get("sentence", ""), r.get("label", ""), r.get("_ts", "")] for r in rows]
return summary, df_data
merge_ds_btn.click(_run_ds_merge, inputs=[strat_dd], outputs=[merge_summary_md, merge_result_table])
demo.load(lambda: _run_ds_merge("LWW"), outputs=[merge_summary_md, merge_result_table])
# -- TAB 2 --------------------------------------------------------
with gr.Tab("Conflict Analysis"):
gr.Markdown("""
## Conflict Analysis
Runs the same dataset through all 4 strategies and computes per-field conflict rates
between strategy pairs. The heatmap shows how often two strategies disagree on a record.
### How to Read the Results
- **Conflict Rate Heatmap:** Each cell shows the fraction of overlapping records where two strategies produce **different values** for a given field. Brighter = more disagreement. The diagonal is always 0 (a strategy agrees with itself).
- `sentence:LWW` vs `sentence:MaxWins` = "how often do LWW and MaxWins disagree on the sentence field?"
- High conflict rates between strategies mean the choice of strategy materially affects the merged output.
- **Comparison Table:** Shows how each strategy differs from LWW (the baseline). `0 conflicts` = identical behavior for this dataset. Higher numbers indicate the strategy resolves more records differently.
- **Why this matters:** In production systems, teams need to understand which strategy is appropriate for their data. If all strategies agree, the choice doesn't matter. If they diverge significantly, the strategy selection is a critical design decision.
""")
with gr.Row():
conflict_btn = gr.Button("Run Conflict Analysis", variant="primary")
conflict_chart = gr.Plot(label="Per-Field Conflict Matrix Heatmap")
conflict_table = gr.Dataframe(
headers=["Strategy", "Conflicts vs LWW", "Overlap Records", "Conflict Rate"],
label="Strategy Comparison",
)
conflict_e4_md = gr.Markdown()
def _run_conflict():
rows, fig = run_conflict_analysis()
df_data = [
[r["Strategy"], r["Conflicts vs LWW"], r["Overlap Records"], r["Conflict Rate"]]
for r in rows
]
e4_md = _e4_conflict_trust_analysis()
return fig, df_data, e4_md
conflict_btn.click(_run_conflict, outputs=[conflict_chart, conflict_table, conflict_e4_md])
demo.load(_run_conflict, outputs=[conflict_chart, conflict_table, conflict_e4_md])
# -- TAB 3 --------------------------------------------------------
with gr.Tab("Core CRDT Primitives"):
gr.Markdown("""
## Core CRDT Primitives
Live demonstration of GCounter, PNCounter, LWWRegister, and ORSet.
Each primitive is operated on two nodes independently, then merged in both directions.
Commutativity is verified: merge(A,B) must equal merge(B,A).
### How to Read the Results
| Primitive | What It Does | Merge Semantics |
|---|---|---|
| **GCounter** | Grow-only counter | Each node's count is tracked separately. Merge takes the **max per node**, then sums. Node A=8 + Node B=7 → merged=15. |
| **PNCounter** | Increment/decrement counter | Two internal GCounters (positive + negative). Merge takes max per node for each. Net value = positives − negatives. |
| **LWWRegister** | Last-Writer-Wins register | Stores a single value + timestamp. Merge keeps the value with the **latest timestamp**. Node A writes "model_v2" at t=3.0 > Node B's t=2.0, so A wins. |
| **ORSet** | Observed-Remove Set | Add/remove elements with unique tags. Merge is the **union** of all adds minus confirmed removes. Both nodes' elements appear in the merged set. |
- **merge(A,B) = merge(B,A):** The "Commutative" column proves this. PASS means the primitive is safe for distributed use — merge order doesn't affect the result.
- These are the building blocks that power crdt-merge's higher-level DataFrame and model merge operations.
""")
with gr.Row():
prim_btn = gr.Button("Run Primitives Demo", variant="primary")
prim_table = gr.Dataframe(
headers=["Primitive", "Node A Operations", "Node B Operations",
"merge(A,B) Value", "merge(B,A) Value", "Commutative"],
label="Primitive Commutativity Proof",
wrap=True,
)
prim_e4_md = gr.Markdown()
def _run_prims():
rows = run_primitives_demo()
table_data = [
[r["Primitive"], r["Node A Operations"], r["Node B Operations"],
r["merge(A,B) Value"], r["merge(B,A) Value"], r["Commutative"]]
for r in rows
]
e4_md = _e4_primitives_trust()
return table_data, e4_md
prim_btn.click(_run_prims, outputs=[prim_table, prim_e4_md])
demo.load(_run_prims, outputs=[prim_table, prim_e4_md])
gr.Markdown("""
---
**crdt-merge v0.9.5** · Patent UK 2607132.4, GB2608127.3 · E4 Trust-Delta · BUSL-1.1 → Apache 2.0 (2028-03-29)
[🏠 Flagship](https://huggingface.co/spaces/optitransfer/crdt-merge) · [🔬 Data Playground](https://huggingface.co/spaces/optitransfer/crdt-merge-data) · [🌐 Federation](https://huggingface.co/spaces/optitransfer/crdt-merge-federation) · [GitHub](https://github.com/mgillr/crdt-merge) · [⭐ Star Repo](https://github.com/mgillr/crdt-merge/stargazers) · [👁️ Watch](https://github.com/mgillr/crdt-merge/subscription) · [📐 Architecture Deep Dive](https://github.com/mgillr/crdt-merge/tree/main/docs/architecture) · [PyPI](https://pypi.org/project/crdt-merge/) · `pip install crdt-merge`
""")
if __name__ == "__main__":
demo.launch()