# SPDX-License-Identifier: BUSL-1.1 # Copyright 2026 Ryan Gillespie / Optitransfer # # Licensed under the Business Source License 1.1 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://github.com/mgillr/crdt-merge/blob/main/LICENSE # # Change Date: 2028-03-29 # Change License: Apache License, Version 2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # On 2028-03-29 this file converts to Apache License, Version 2.0. """ crdt-merge v0.9.5 — Data Playground HuggingFace Space Tabular CRDT merge, conflict analysis, and core primitive demonstrations. """ import os import json import time import numpy as np import gradio as gr import plotly.graph_objects as go CSS = """ .gradio-container { background: #09090b !important; font-family: 'Inter', system-ui, sans-serif !important; } .gr-button-primary { background: linear-gradient(135deg, #2563eb, #1d4ed8) !important; border: none !important; color: #fff !important; font-weight: 600 !important; } footer { display: none !important; } .tab-nav button { color: #a1a1aa !important; font-size: 13px !important; letter-spacing: 0.05em !important; text-transform: uppercase !important; font-weight: 600 !important; padding: 10px 16px !important; } .tab-nav button.selected { color: #f4f4f5 !important; border-bottom: 2px solid #3b82f6 !important; } .tab-nav button:hover { color: #e4e4e7 !important; } code, .monospace { font-family: 'JetBrains Mono', ui-monospace, monospace !important; font-size: 13px !important; } h1, h2, h3 { color: #f4f4f5 !important; } p, li { color: #d4d4d8 !important; font-size: 15px !important; line-height: 1.7 !important; } label, .gr-input-label, .label-wrap span { color: #e4e4e7 !important; font-size: 14px !important; font-weight: 500 !important; } input, textarea, select, .gr-box { color: #f4f4f5 !important; background: #18181b !important; border-color: #3f3f46 !important; } .gr-dataframe th, table th { color: #f4f4f5 !important; background: #18181b !important; font-weight: 600 !important; font-size: 13px !important; } .gr-dataframe td, table td { color: #d4d4d8 !important; font-size: 13px !important; border-color: #27272a !important; } .gr-dataframe tr:hover td { background: #1e1e22 !important; } .gr-info, .info { color: #a1a1aa !important; font-size: 12px !important; } strong { color: #f4f4f5 !important; } """ PLOTLY_LAYOUT = dict( paper_bgcolor="#09090b", plot_bgcolor="#18181b", font=dict(color="#a1a1aa", family="Inter"), xaxis=dict(gridcolor="#27272a", linecolor="#27272a"), yaxis=dict(gridcolor="#27272a", linecolor="#27272a"), margin=dict(l=60, r=20, t=40, b=60), ) THEME = gr.themes.Base( primary_hue=gr.themes.colors.blue, neutral_hue=gr.themes.colors.zinc, ) NAV_MD = """**[🏠 Flagship](https://huggingface.co/spaces/optitransfer/crdt-merge) · [🔬 Data Playground](https://huggingface.co/spaces/optitransfer/crdt-merge-data) · [🌐 Federation](https://huggingface.co/spaces/optitransfer/crdt-merge-federation) · [GitHub ↗](https://github.com/mgillr/crdt-merge) · [⭐ Star Repo](https://github.com/mgillr/crdt-merge/stargazers) · [👁️ Watch](https://github.com/mgillr/crdt-merge/subscription) · [📐 Architecture Deep Dive](https://github.com/mgillr/crdt-merge/tree/main/docs/architecture) · [PyPI ↗](https://pypi.org/project/crdt-merge/)**""" HERO_MD = """ # crdt-merge — Data Playground Tabular CRDT merge for DataFrames and datasets. Conflict-free record merge, deduplication, and provenance tracking. `pip install crdt-merge` · [GitHub](https://github.com/mgillr/crdt-merge) · [PyPI](https://pypi.org/project/crdt-merge/) · Patent UK 2607132.4, GB2608127.3 · E4 Trust-Delta Architecture """ STRATEGIES_DF = ["LWW", "MaxWins", "MinWins", "Union"] # ----------------------------------------------------------------- # Data loading # ----------------------------------------------------------------- def _load_dataset_records(): """Try HF datasets first, fallback to synthetic.""" source = "synthetic" records_a = [] records_b = [] try: from datasets import load_dataset ds = load_dataset("glue", "sst2", split="train[:200]") all_records = [{"id": i, "sentence": ds[i]["sentence"], "label": ds[i]["label"], "_ts": i} for i in range(len(ds))] records_a = all_records[:150] # Node B: overlapping records (100-149) get modified values + later timestamps records_b = [] for r in all_records[100:]: rid = r["id"] if rid < 150: # overlapping region -- simulate a different node's edits records_b.append({ "id": rid, "sentence": r["sentence"].strip() + " [node-B edit]", "label": 1 - r["label"], # flip label to create real conflict "_ts": rid + 50, # later timestamp for LWW }) else: records_b.append(r) source = "glue/sst2 (HuggingFace datasets, 200 rows, 50 conflicting overlap)" except Exception: pass if not records_a: rng = np.random.RandomState(7) adjectives = ["good", "bad", "great", "poor", "excellent", "terrible", "fine", "awful"] nouns = ["film", "movie", "picture", "show", "performance", "script", "cast", "story"] for i in range(200): adj = adjectives[i % len(adjectives)] noun = nouns[i % len(nouns)] records_a.append({"id": i, "sentence": f"A {adj} {noun}.", "label": i % 2, "_ts": i}) for i in range(100, 200): adj = adjectives[(i + 3) % len(adjectives)] noun = nouns[(i + 2) % len(nouns)] records_b.append({"id": i, "sentence": f"An {adj} {noun}.", "label": (i + 1) % 2, "_ts": i + 50}) for i in range(200, 250): adj = adjectives[i % len(adjectives)] noun = nouns[i % len(nouns)] records_b.append({"id": i, "sentence": f"The {adj} {noun}.", "label": i % 2, "_ts": i}) source = "synthetic (SST-2 style, 150 + 100 records with 50 overlap)" return records_a, records_b, source # ----------------------------------------------------------------- # TAB 1 -- Dataset Merge # ----------------------------------------------------------------- def run_dataset_merge(strategy_name: str): from crdt_merge.dataframe import merge as df_merge from crdt_merge.strategies import MergeSchema, LWW, MaxWins, MinWins, UnionSet strategy_map = { "LWW": LWW(), "MaxWins": MaxWins(), "MinWins": MinWins(), "Union": UnionSet(), } schema = MergeSchema(default=strategy_map[strategy_name]) records_a, records_b, source = _load_dataset_records() t0 = time.perf_counter() try: merged = df_merge(records_a, records_b, key="id", schema=schema, timestamp_col="_ts") elapsed = (time.perf_counter() - t0) * 1000 # Verify commutativity merged_ba = df_merge(records_b, records_a, key="id", schema=schema, timestamp_col="_ts") ids_ab = sorted([r["id"] for r in merged]) ids_ba = sorted([r["id"] for r in merged_ba]) comm_pass = ids_ab == ids_ba summary_md = f""" **Dataset Merge Complete** | Metric | Value | |---|---| | Source | {source} | | Strategy | {strategy_name} | | Node A records | {len(records_a)} | | Node B records | {len(records_b)} | | Overlapping IDs | {len(set(r['id'] for r in records_a) & set(r['id'] for r in records_b))} | | Merged records | {len(merged)} | | Elapsed | {elapsed:.1f}ms | | Commutative (merge_AB == merge_BA) | **{"PASS" if comm_pass else "FAIL"}** | ### Understanding the Results - **Merged Records Table:** Shows the first 20 rows after merging Node A and Node B. For overlapping record IDs (where both nodes have the same row but different values), the selected strategy decides which value wins. - **Strategy Behavior:** - `LWW` (Last-Writer-Wins) — the record with the **later timestamp** (`_ts`) wins. This is the most common strategy in distributed databases. - `MaxWins` — for numeric fields, the **larger value** wins. For text, lexicographic max. - `MinWins` — the **smaller value** wins. Useful for minimum-bid auctions or earliest-deadline scenarios. - `Union` — keeps **all values** as a set (no data is lost, but deduplication may be needed downstream). - **Commutativity PASS** means `merge(A, B)` and `merge(B, A)` produce identical results — a core CRDT guarantee. This ensures any two replicas performing the merge get the same output regardless of order. """ # E4 Trust Layer -- trust scores and Merkle provenance for the merge e4_md = "" try: from crdt_merge.e4 import TypedTrustScore from crdt_merge.e4.delta_trust_lattice import DeltaTrustLattice from crdt_merge.e4.trust_bound_merkle import TrustBoundMerkle ids_a = set(r["id"] for r in records_a) ids_b = set(r["id"] for r in records_b) lattice_a = DeltaTrustLattice(peer_id="node_A") lattice_b = DeltaTrustLattice(peer_id="node_B") score_a_self = lattice_a.get_trust("node_A") score_b_self = lattice_b.get_trust("node_B") score_a_from_b = lattice_b.get_trust("node_A") score_b_from_a = lattice_a.get_trust("node_B") merkle = TrustBoundMerkle(trust_lattice=lattice_a) for r in merged: originator = "node_A" if r["id"] in ids_b and r["id"] not in ids_a: originator = "node_B" elif r["id"] in ids_b and r["id"] in ids_a: originator = "node_B" if r.get("_ts", 0) >= 150 else "node_A" merkle.insert_leaf( key=str(r["id"]), data=json.dumps(r, default=str).encode(), originator=originator, ) root_hash = merkle.recompute() e4_md = f""" --- ### E4 Trust Layer | Peer | Lattice | Overall Trust | Status | |------|---------|--------------|--------| | node_A | node_A (self) | {score_a_self.overall_trust():.3f} | {"Probationary" if score_a_self.overall_trust() <= 0.5 else "Trusted"} | | node_B | node_B (self) | {score_b_self.overall_trust():.3f} | {"Probationary" if score_b_self.overall_trust() <= 0.5 else "Trusted"} | | node_A | node_B (cross) | {score_a_from_b.overall_trust():.3f} | {"Probationary" if score_a_from_b.overall_trust() <= 0.5 else "Trusted"} | | node_B | node_A (cross) | {score_b_from_a.overall_trust():.3f} | {"Probationary" if score_b_from_a.overall_trust() <= 0.5 else "Trusted"} | **Merkle Provenance Root:** `{root_hash}` **Merged records in Merkle tree:** {len(merged)} **Trust scoring:** All merge participants start at probationary (0.5) trust. Trust accrues over time via successful merges and evidence accumulation. """ except Exception as e: e4_md = f"\n\n---\n### E4 Trust Layer\n\nE4 trust module unavailable: {e}\n" summary_md = summary_md + e4_md display_rows = merged[:20] return display_rows, summary_md except Exception as e: return [], f"Error: {e}" # ----------------------------------------------------------------- # TAB 2 -- Conflict Analysis # ----------------------------------------------------------------- def run_conflict_analysis(): from crdt_merge.dataframe import merge as df_merge from crdt_merge.strategies import MergeSchema, LWW, MaxWins, MinWins, UnionSet records_a, records_b, source = _load_dataset_records() overlap_ids = set(r["id"] for r in records_a) & set(r["id"] for r in records_b) strategy_map = { "LWW": LWW(), "MaxWins": MaxWins(), "MinWins": MinWins(), "Union": UnionSet(), } fields = ["sentence", "label"] results_by_strategy = {} for strat_name, strat in strategy_map.items(): schema = MergeSchema(default=strat) try: merged = df_merge(records_a, records_b, key="id", schema=schema, timestamp_col="_ts") results_by_strategy[strat_name] = {r["id"]: r for r in merged if r["id"] in overlap_ids} except Exception as e: results_by_strategy[strat_name] = {} # Build conflict matrix: per-field, per-strategy-pair, how many records differ strat_names = list(strategy_map.keys()) conflict_matrix = {} for field in fields: conflict_matrix[field] = np.zeros((len(strat_names), len(strat_names)), dtype=np.float32) for i, s1 in enumerate(strat_names): for j, s2 in enumerate(strat_names): if i == j: continue diffs = 0 total = 0 for rid in overlap_ids: r1 = results_by_strategy[s1].get(rid) r2 = results_by_strategy[s2].get(rid) if r1 is not None and r2 is not None: total += 1 if str(r1.get(field, "")) != str(r2.get(field, "")): diffs += 1 conflict_matrix[field][i, j] = diffs / max(total, 1) # Heatmap: combine fields side by side combined_z = np.concatenate([conflict_matrix[f] for f in fields], axis=1) col_labels = [f"{f}:{s}" for f in fields for s in strat_names] fig = go.Figure(data=go.Heatmap( z=combined_z.tolist(), x=col_labels, y=strat_names, colorscale=[[0, "#18181b"], [1, "#3b82f6"]], showscale=True, colorbar=dict(title="Conflict Rate"), )) fig.update_layout( **PLOTLY_LAYOUT, title=f"Per-Field Conflict Matrix — Strategy vs Strategy (source: {source[:40]}...)", xaxis_title="Field : Strategy (column)", yaxis_title="Strategy (row)", ) # Summary table: how many overlapping records each strategy resolves differently from LWW summary_rows = [] for strat_name in strat_names: diffs_vs_lww = 0 for rid in overlap_ids: r_lww = results_by_strategy["LWW"].get(rid) r_s = results_by_strategy[strat_name].get(rid) if r_lww and r_s: for field in fields: if str(r_lww.get(field, "")) != str(r_s.get(field, "")): diffs_vs_lww += 1 break summary_rows.append({ "Strategy": strat_name, "Conflicts vs LWW": diffs_vs_lww, "Overlap Records": len(overlap_ids), "Conflict Rate": f"{diffs_vs_lww / max(len(overlap_ids), 1):.2%}", }) return summary_rows, fig def _e4_conflict_trust_analysis(): """Run E4 trust evidence analysis for detected conflicts. Returns markdown string.""" try: from crdt_merge.e4 import TypedTrustScore from crdt_merge.e4.delta_trust_lattice import DeltaTrustLattice from crdt_merge.e4.proof_evidence import TrustEvidence, EVIDENCE_TYPES records_a, records_b, source = _load_dataset_records() overlap_ids = set(r["id"] for r in records_a) & set(r["id"] for r in records_b) map_a = {r["id"]: r for r in records_a} map_b = {r["id"]: r for r in records_b} lattice = DeltaTrustLattice(peer_id="auditor") evidence_log = [] # Detect conflict types and fire evidence equivocation_count = 0 invalid_delta_count = 0 for rid in sorted(overlap_ids): ra = map_a.get(rid) rb = map_b.get(rid) if ra is None or rb is None: continue # Same key, different values = equivocation evidence if str(ra.get("sentence", "")) != str(rb.get("sentence", "")): ev = TrustEvidence.create( observer="auditor", target="node_B", evidence_type="equivocation", dimension="consistency", amount=-0.05, proof=f"id={rid} sentence diverged".encode(), ) evidence_log.append(("equivocation", "node_B", rid, "consistency")) equivocation_count += 1 # Label flip = invalid_delta evidence if ra.get("label") != rb.get("label"): ev = TrustEvidence.create( observer="auditor", target="node_B", evidence_type="invalid_delta", dimension="integrity", amount=-0.1, proof=f"id={rid} label flipped {ra.get('label')}->{rb.get('label')}".encode(), ) evidence_log.append(("invalid_delta", "node_B", rid, "integrity")) invalid_delta_count += 1 # Get trust scores after evidence score_a = lattice.get_trust("node_A") score_b = lattice.get_trust("node_B") # Build trust verdict table verdict_rows = [] for ev_type, target, rid, dim in evidence_log[:10]: verdict_rows.append(f"| {ev_type} | {target} | {rid} | {dim} |") if len(evidence_log) > 10: verdict_rows.append(f"| ... | ... | ... | ... |") verdict_rows.append(f"| *(total {len(evidence_log)} events)* | | | |") verdict_table = "\n".join(verdict_rows) md = f""" --- ### E4 Trust Layer -- Conflict Evidence **Evidence Events Fired:** {len(evidence_log)} total ({equivocation_count} equivocation, {invalid_delta_count} invalid_delta) | Evidence Type | Target | Record ID | Dimension | |--------------|--------|-----------|-----------| {verdict_table} **Post-Evidence Trust Scores:** | Peer | Overall Trust | Verdict | |------|--------------|---------| | node_A | {score_a.overall_trust():.3f} | {"Probationary" if score_a.overall_trust() <= 0.5 else "Trusted"} -- no negative evidence | | node_B | {score_b.overall_trust():.3f} | {"Probationary" if score_b.overall_trust() <= 0.5 else "Trusted"} -- {len(evidence_log)} evidence events filed | **Interpretation:** Conflicts between nodes degrade trust for the conflicting peer. The trust lattice records evidence so downstream consumers can make trust-aware merge decisions (e.g., reject merges from peers below a trust threshold). """ return md except Exception as e: return f"\n\n---\n### E4 Trust Layer -- Conflict Evidence\n\nE4 trust module unavailable: {e}\n" # ----------------------------------------------------------------- # TAB 3 -- Core CRDT Primitives # ----------------------------------------------------------------- def run_primitives_demo(): from crdt_merge.core import GCounter, PNCounter, LWWRegister, ORSet results = {} # GCounter gc_a = GCounter() gc_a.increment("node_A", 5) gc_a.increment("node_A", 3) gc_b = GCounter() gc_b.increment("node_B", 7) gc_merged_ab = gc_a.merge(gc_b) gc_merged_ba = gc_b.merge(gc_a) results["GCounter"] = { "node_A_ops": "gc_a.increment('node_A', 5); gc_a.increment('node_A', 3) # value=8", "node_B_ops": "gc_b.increment('node_B', 7) # value=7", "merge_AB_value": gc_merged_ab.value, "merge_BA_value": gc_merged_ba.value, "commutative": gc_merged_ab.value == gc_merged_ba.value, } # PNCounter pn_a = PNCounter() pn_a.increment("n", 10) pn_a.decrement("n", 3) pn_b = PNCounter() pn_b.increment("n", 5) pn_merged_ab = pn_a.merge(pn_b) pn_merged_ba = pn_b.merge(pn_a) results["PNCounter"] = { "node_A_ops": "pn_a.increment('n', 10); pn_a.decrement('n', 3) # value=7", "node_B_ops": "pn_b.increment('n', 5) # value=5", "merge_AB_value": pn_merged_ab.value, "merge_BA_value": pn_merged_ba.value, "commutative": pn_merged_ab.value == pn_merged_ba.value, } # LWWRegister lww_a = LWWRegister() lww_a.set("model_v1", timestamp=1.0) lww_a.set("model_v2", timestamp=3.0) lww_b = LWWRegister() lww_b.set("model_v3", timestamp=2.0) lww_merged_ab = lww_a.merge(lww_b) lww_merged_ba = lww_b.merge(lww_a) results["LWWRegister"] = { "node_A_ops": "lww_a.set('model_v1', timestamp=1.0); lww_a.set('model_v2', timestamp=3.0)", "node_B_ops": "lww_b.set('model_v3', timestamp=2.0)", "merge_AB_value": str(lww_merged_ab.value), "merge_BA_value": str(lww_merged_ba.value), "commutative": str(lww_merged_ab.value) == str(lww_merged_ba.value), } # ORSet orset_a = ORSet() orset_a.add("alpha") orset_a.add("beta") tag_beta = orset_a.add("gamma") orset_b = ORSet() orset_b.add("beta") orset_b.add("delta") orset_merged_ab = orset_a.merge(orset_b) orset_merged_ba = orset_b.merge(orset_a) results["ORSet"] = { "node_A_ops": "orset_a.add('alpha'); orset_a.add('beta'); orset_a.add('gamma')", "node_B_ops": "orset_b.add('beta'); orset_b.add('delta')", "merge_AB_value": str(sorted(orset_merged_ab.value)), "merge_BA_value": str(sorted(orset_merged_ba.value)), "commutative": sorted(orset_merged_ab.value) == sorted(orset_merged_ba.value), } rows = [] for name, data in results.items(): rows.append({ "Primitive": name, "Node A Operations": data["node_A_ops"], "Node B Operations": data["node_B_ops"], "merge(A,B) Value": str(data["merge_AB_value"]), "merge(B,A) Value": str(data["merge_BA_value"]), "Commutative": "PASS" if data["commutative"] else "FAIL", }) return rows def _e4_primitives_trust(): """Run E4 trust primitives alongside core CRDTs. Returns markdown string.""" try: from crdt_merge.e4 import TypedTrustScore, FrozenDict from crdt_merge.e4.delta_trust_lattice import DeltaTrustLattice from crdt_merge.e4.trust_bound_merkle import TrustBoundMerkle from crdt_merge.e4.causal_trust_clock import CausalTrustClock from crdt_merge.e4.pco import AggregateProofCarryingOperation # CausalTrustClock demo clock_a = CausalTrustClock(peer_id="node_A") clock_b = CausalTrustClock(peer_id="node_B") # Simulate operations on each clock clock_a = clock_a.increment() # op 1 clock_a = clock_a.increment() # op 2 clock_a = clock_a.increment() # op 3 clock_b = clock_b.increment() # op 1 clock_b = clock_b.increment() # op 2 clock_a_time = clock_a.logical_time clock_b_time = clock_b.logical_time # Merge clocks clock_merged = clock_a.merge(clock_b) clock_merged_time = clock_merged.logical_time # Trust-Bound Merkle tree wrapping primitive operations lattice = DeltaTrustLattice(peer_id="node_A") merkle = TrustBoundMerkle(trust_lattice=lattice) ops = [ ("gcounter_inc_A", b"increment(node_A, 5)", "node_A"), ("gcounter_inc_B", b"increment(node_B, 7)", "node_B"), ("pncounter_inc", b"increment(n, 10)", "node_A"), ("pncounter_dec", b"decrement(n, 3)", "node_A"), ("lww_set_v1", b"set(model_v1, ts=1.0)", "node_A"), ("lww_set_v3", b"set(model_v3, ts=2.0)", "node_B"), ("orset_add_alpha", b"add(alpha)", "node_A"), ("orset_add_delta", b"add(delta)", "node_B"), ] for key, data, orig in ops: merkle.insert_leaf(key=key, data=data, originator=orig) merkle_root = merkle.recompute() # PCO wire format pco = AggregateProofCarryingOperation( aggregate_hash=b'\x00' * 32, signature=b'\x00' * 64, originator_id="node_A", metadata=b'{"ops": 8}', merkle_root_at_creation=str(merkle_root), clock_snapshot=b'\x03', trust_vector_hash="tvh_demo", delta_bounds=(), ) wire = pco.to_wire() wire_size = len(wire) md = f""" --- ### E4 Trust Layer -- Primitive-Level Trust #### CausalTrustClock | Clock | Operations | Logical Time | |-------|-----------|-------------| | node_A | 3 increments | {clock_a_time} | | node_B | 2 increments | {clock_b_time} | | merged(A, B) | merge | {clock_merged_time} | Causal trust clocks are immutable -- each `increment()` returns a new clock instance. The merged clock captures the causal frontier of both peers. #### Trust-Bound Merkle Tree | Property | Value | |----------|-------| | Leaves inserted | {len(ops)} | | Operations covered | GCounter, PNCounter, LWWRegister, ORSet | | Merkle root | `{merkle_root}` | Every CRDT operation is recorded as a Merkle leaf with its originator. The trust-bound Merkle tree links each leaf to the originator's trust score in the lattice, enabling per-operation provenance auditing. #### Proof-Carrying Operation (PCO) Wire Format | Property | Value | |----------|-------| | Wire size | {wire_size} bytes | | Originator | node_A | | Merkle root at creation | `{str(merkle_root)[:32]}...` | | Format | AggregateProofCarryingOperation | The PCO bundles a cryptographic proof (aggregate hash + signature), the Merkle root at time of creation, and a clock snapshot into a compact wire format suitable for gossip protocols. """ return md except Exception as e: return f"\n\n---\n### E4 Trust Layer -- Primitive-Level Trust\n\nE4 trust module unavailable: {e}\n" # ----------------------------------------------------------------- # Gradio UI # ----------------------------------------------------------------- with gr.Blocks(theme=THEME, css=CSS, title="crdt-merge — Data Playground") as demo: gr.Markdown(NAV_MD) gr.Markdown(HERO_MD) with gr.Tabs(): # -- TAB 1 -------------------------------------------------------- with gr.Tab("Dataset Merge"): gr.Markdown(""" ## Dataset Merge Loads glue/sst2 from HuggingFace datasets (first 200 rows) or uses synthetic fallback. Splits into two node partitions with 50 overlapping records. Demonstrates conflict-free merge with configurable strategy. > **E4 Trust Scoring Active (v0.9.5+):** All merge operations now carry typed trust scores by default. Every record merge accumulates accuracy, consistency, recency, and provenance trust dimensions via GCounter-backed convergent accumulators. Trust propagation adds zero API overhead -- it activates transparently on `import crdt_merge`. """) with gr.Row(): strat_dd = gr.Dropdown( choices=STRATEGIES_DF, value="LWW", label="Merge Strategy", info="LWW = Last Write Wins (by timestamp). MaxWins/MinWins = field max/min. Union = set union.", ) merge_ds_btn = gr.Button("Run Dataset Merge", variant="primary") merge_summary_md = gr.Markdown() merge_result_table = gr.Dataframe( headers=["id", "sentence", "label", "_ts"], label="Merged Records (first 20 rows)", wrap=True, ) def _run_ds_merge(strategy): rows, summary = run_dataset_merge(strategy) df_data = [[r.get("id", ""), r.get("sentence", ""), r.get("label", ""), r.get("_ts", "")] for r in rows] return summary, df_data merge_ds_btn.click(_run_ds_merge, inputs=[strat_dd], outputs=[merge_summary_md, merge_result_table]) demo.load(lambda: _run_ds_merge("LWW"), outputs=[merge_summary_md, merge_result_table]) # -- TAB 2 -------------------------------------------------------- with gr.Tab("Conflict Analysis"): gr.Markdown(""" ## Conflict Analysis Runs the same dataset through all 4 strategies and computes per-field conflict rates between strategy pairs. The heatmap shows how often two strategies disagree on a record. ### How to Read the Results - **Conflict Rate Heatmap:** Each cell shows the fraction of overlapping records where two strategies produce **different values** for a given field. Brighter = more disagreement. The diagonal is always 0 (a strategy agrees with itself). - `sentence:LWW` vs `sentence:MaxWins` = "how often do LWW and MaxWins disagree on the sentence field?" - High conflict rates between strategies mean the choice of strategy materially affects the merged output. - **Comparison Table:** Shows how each strategy differs from LWW (the baseline). `0 conflicts` = identical behavior for this dataset. Higher numbers indicate the strategy resolves more records differently. - **Why this matters:** In production systems, teams need to understand which strategy is appropriate for their data. If all strategies agree, the choice doesn't matter. If they diverge significantly, the strategy selection is a critical design decision. """) with gr.Row(): conflict_btn = gr.Button("Run Conflict Analysis", variant="primary") conflict_chart = gr.Plot(label="Per-Field Conflict Matrix Heatmap") conflict_table = gr.Dataframe( headers=["Strategy", "Conflicts vs LWW", "Overlap Records", "Conflict Rate"], label="Strategy Comparison", ) conflict_e4_md = gr.Markdown() def _run_conflict(): rows, fig = run_conflict_analysis() df_data = [ [r["Strategy"], r["Conflicts vs LWW"], r["Overlap Records"], r["Conflict Rate"]] for r in rows ] e4_md = _e4_conflict_trust_analysis() return fig, df_data, e4_md conflict_btn.click(_run_conflict, outputs=[conflict_chart, conflict_table, conflict_e4_md]) demo.load(_run_conflict, outputs=[conflict_chart, conflict_table, conflict_e4_md]) # -- TAB 3 -------------------------------------------------------- with gr.Tab("Core CRDT Primitives"): gr.Markdown(""" ## Core CRDT Primitives Live demonstration of GCounter, PNCounter, LWWRegister, and ORSet. Each primitive is operated on two nodes independently, then merged in both directions. Commutativity is verified: merge(A,B) must equal merge(B,A). ### How to Read the Results | Primitive | What It Does | Merge Semantics | |---|---|---| | **GCounter** | Grow-only counter | Each node's count is tracked separately. Merge takes the **max per node**, then sums. Node A=8 + Node B=7 → merged=15. | | **PNCounter** | Increment/decrement counter | Two internal GCounters (positive + negative). Merge takes max per node for each. Net value = positives − negatives. | | **LWWRegister** | Last-Writer-Wins register | Stores a single value + timestamp. Merge keeps the value with the **latest timestamp**. Node A writes "model_v2" at t=3.0 > Node B's t=2.0, so A wins. | | **ORSet** | Observed-Remove Set | Add/remove elements with unique tags. Merge is the **union** of all adds minus confirmed removes. Both nodes' elements appear in the merged set. | - **merge(A,B) = merge(B,A):** The "Commutative" column proves this. PASS means the primitive is safe for distributed use — merge order doesn't affect the result. - These are the building blocks that power crdt-merge's higher-level DataFrame and model merge operations. """) with gr.Row(): prim_btn = gr.Button("Run Primitives Demo", variant="primary") prim_table = gr.Dataframe( headers=["Primitive", "Node A Operations", "Node B Operations", "merge(A,B) Value", "merge(B,A) Value", "Commutative"], label="Primitive Commutativity Proof", wrap=True, ) prim_e4_md = gr.Markdown() def _run_prims(): rows = run_primitives_demo() table_data = [ [r["Primitive"], r["Node A Operations"], r["Node B Operations"], r["merge(A,B) Value"], r["merge(B,A) Value"], r["Commutative"]] for r in rows ] e4_md = _e4_primitives_trust() return table_data, e4_md prim_btn.click(_run_prims, outputs=[prim_table, prim_e4_md]) demo.load(_run_prims, outputs=[prim_table, prim_e4_md]) gr.Markdown(""" --- **crdt-merge v0.9.5** · Patent UK 2607132.4, GB2608127.3 · E4 Trust-Delta · BUSL-1.1 → Apache 2.0 (2028-03-29) [🏠 Flagship](https://huggingface.co/spaces/optitransfer/crdt-merge) · [🔬 Data Playground](https://huggingface.co/spaces/optitransfer/crdt-merge-data) · [🌐 Federation](https://huggingface.co/spaces/optitransfer/crdt-merge-federation) · [GitHub](https://github.com/mgillr/crdt-merge) · [⭐ Star Repo](https://github.com/mgillr/crdt-merge/stargazers) · [👁️ Watch](https://github.com/mgillr/crdt-merge/subscription) · [📐 Architecture Deep Dive](https://github.com/mgillr/crdt-merge/tree/main/docs/architecture) · [PyPI](https://pypi.org/project/crdt-merge/) · `pip install crdt-merge` """) if __name__ == "__main__": demo.launch()