MetaCortex-Dynamics commited on
Commit
70c4b1b
Β·
verified Β·
1 Parent(s): 9ba436b

Create pipeline/mdlm/governed_pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline/mdlm/governed_pipeline.py +326 -0
pipeline/mdlm/governed_pipeline.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Governed Generation Pipeline β€” Full 4-Phase End-to-End
3
+
4
+ PROPOSE (MDLM) β†’ DECIDE (G₁-G₇) β†’ PROMOTE (witness commitment) β†’ EXECUTE (output)
5
+
6
+ This is the GGP working as designed. Four phases, architecturally separated.
7
+ PROPOSE β‰  DECIDE β‰  PROMOTE β‰  EXECUTE enforced by module boundaries.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import time
14
+ from dataclasses import dataclass, field, asdict
15
+ from datetime import datetime, timezone
16
+ from hashlib import sha256
17
+ from typing import Optional
18
+
19
+ try:
20
+ import torch
21
+ HAS_TORCH = True
22
+ except ImportError:
23
+ HAS_TORCH = False
24
+
25
+ from pipeline.types import Op, Witness, FrameExample, WitnessBundle, WitnessAttestation
26
+ from pipeline.mdlm.tokenizer import (
27
+ decode, TOKEN_NAMES, OP_OFFSET, WIT_OFFSET, ATTESTED, WITHHELD,
28
+ G_OPEN, G_CLOSE, S_OPEN, S_CLOSE, F_OPEN, F_CLOSE, BOS, EOS, PAD, MASK,
29
+ )
30
+ from pipeline.stages.s4_validate import (
31
+ validate_and_score, TigStatus, Verdict, AdmissibilityResult,
32
+ )
33
+
34
+
35
+ # ═══════════════════════════════════════════════════════════════════════════════
36
+ # PHASE 1: PROPOSE β€” MDLM crystallizes candidate governed structures
37
+ # ═══════════════════════════════════════════════════════════════════════════════
38
+
39
+ @dataclass
40
+ class Candidate:
41
+ """A candidate governed structure crystallized by PROPOSE."""
42
+ tokens: list[int]
43
+ decoded: str
44
+ proposed_at: str = ""
45
+
46
+
47
+ def propose(model, num_candidates: int = 10, seq_len: int = 40,
48
+ g_slots: int = 3, s_slots: int = 4, f_slots: int = 3) -> list[Candidate]:
49
+ """PROPOSE phase: MDLM crystallizes candidate governed structures from noise."""
50
+ from pipeline.mdlm.model import MaskingSchedule, generate
51
+
52
+ samples = generate(model, num_candidates, seq_len, MaskingSchedule.HIERARCHICAL, 20,
53
+ g_slots=g_slots, s_slots=s_slots, f_slots=f_slots)
54
+
55
+ candidates = []
56
+ for i in range(num_candidates):
57
+ tokens = samples[i].tolist()
58
+ candidates.append(Candidate(
59
+ tokens=tokens,
60
+ decoded=decode(tokens),
61
+ proposed_at=datetime.now(timezone.utc).isoformat(),
62
+ ))
63
+ return candidates
64
+
65
+
66
+ # ═══════════════════════════════════════════════════════════════════════════════
67
+ # PHASE 2: DECIDE β€” G₁-G₇ admissibility gates evaluate candidates
68
+ # ═══════════════════════════════════════════════════════════════════════════════
69
+
70
+ @dataclass
71
+ class Decision:
72
+ """Gate decision on a candidate."""
73
+ candidate_index: int
74
+ tig_status: str # T, U, F
75
+ verdict: str
76
+ rejected_at: Optional[str] = None
77
+ viki_patterns: list[str] = field(default_factory=list)
78
+ decided_at: str = ""
79
+
80
+
81
+ def tokens_to_example(tokens: list[int]) -> Optional[FrameExample]:
82
+ """Convert generated token sequence back to FrameExample for validation."""
83
+ from pipeline.types import (
84
+ ModalityGrounding, OperatorSequence, OperatorExpression,
85
+ SourceProvenance, Tier,
86
+ )
87
+
88
+ g_ops, s_ops, f_ops = [], [], []
89
+ witnesses_raw = {}
90
+ current_mod = None
91
+
92
+ i = 0
93
+ while i < len(tokens):
94
+ t = tokens[i]
95
+ if t == G_OPEN: current_mod = "G"
96
+ elif t == S_OPEN: current_mod = "S"
97
+ elif t == F_OPEN: current_mod = "F"
98
+ elif t in (G_CLOSE, S_CLOSE, F_CLOSE): current_mod = None
99
+ elif OP_OFFSET <= t < OP_OFFSET + 15 and current_mod:
100
+ op_name = TOKEN_NAMES[t]
101
+ target = {"G": g_ops, "S": s_ops, "F": f_ops}.get(current_mod)
102
+ if target is not None:
103
+ target.append({"operator": op_name, "evidence": f"generated({op_name})"})
104
+ elif WIT_OFFSET <= t < WIT_OFFSET + 7:
105
+ wit_name = TOKEN_NAMES[t]
106
+ if i + 1 < len(tokens):
107
+ witnesses_raw[wit_name] = {"attested": tokens[i + 1] == ATTESTED, "evidence": f"generated({wit_name})"}
108
+ i += 1
109
+ i += 1
110
+
111
+ if not g_ops or not s_ops or not f_ops:
112
+ return None
113
+
114
+ example = FrameExample(
115
+ provenance=SourceProvenance(
116
+ source_id="mdlm:generated", tier=Tier.T1, url="mdlm",
117
+ commit_or_version="variant_A", license="generated",
118
+ acquired_at="2026-03-28", artifact_sha256="generated"),
119
+ channel_a=ModalityGrounding(modality="G", operators=OperatorSequence(
120
+ expressions=[OperatorExpression(operator=Op.from_name(op["operator"]), evidence=op["evidence"])
121
+ for op in g_ops if Op.from_name(op["operator"]) is not None])),
122
+ channel_b=ModalityGrounding(modality="S", operators=OperatorSequence(
123
+ expressions=[OperatorExpression(operator=Op.from_name(op["operator"]), evidence=op["evidence"])
124
+ for op in s_ops if Op.from_name(op["operator"]) is not None])),
125
+ channel_c=ModalityGrounding(modality="F", operators=OperatorSequence(
126
+ expressions=[OperatorExpression(operator=Op.from_name(op["operator"]), evidence=op["evidence"])
127
+ for op in f_ops if Op.from_name(op["operator"]) is not None])),
128
+ witnesses=WitnessBundle(),
129
+ )
130
+ for w in Witness:
131
+ wd = witnesses_raw.get(w.canonical_name, {})
132
+ example.witnesses.attestations[w] = WitnessAttestation(
133
+ witness=w, attested=wd.get("attested", False), evidence=wd.get("evidence", ""))
134
+ example.content_hash = example.compute_hash()
135
+ return example
136
+
137
+
138
+ def decide(candidates: list[Candidate]) -> list[tuple[Candidate, Decision, Optional[FrameExample]]]:
139
+ """DECIDE phase: Run each candidate through G₁-G₇ admissibility gates."""
140
+
141
+ results = []
142
+ for i, candidate in enumerate(candidates):
143
+ example = tokens_to_example(candidate.tokens)
144
+
145
+ if example is None:
146
+ decision = Decision(
147
+ candidate_index=i,
148
+ tig_status="F",
149
+ verdict="FAIL",
150
+ rejected_at="PARSE",
151
+ decided_at=datetime.now(timezone.utc).isoformat(),
152
+ )
153
+ results.append((candidate, decision, None))
154
+ continue
155
+
156
+ admissibility = validate_and_score(example)
157
+
158
+ decision = Decision(
159
+ candidate_index=i,
160
+ tig_status=admissibility.tig_status.value,
161
+ verdict=admissibility.verdict.value,
162
+ rejected_at=admissibility.rejected_at,
163
+ viki_patterns=[vp.pattern_type for vp in admissibility.viki_patterns],
164
+ decided_at=datetime.now(timezone.utc).isoformat(),
165
+ )
166
+ results.append((candidate, decision, example if admissibility.tig_status == TigStatus.TRUE else None))
167
+
168
+ return results
169
+
170
+
171
+ # ═══════════════════════════════════════════════════════════════════════════════
172
+ # PHASE 3: PROMOTE β€” Witness commitment (irrevocable)
173
+ # ═══════════════════════════════════════════════════════════════════════════════
174
+
175
+ @dataclass
176
+ class WitnessCommitment:
177
+ """Irrevocable witness commitment for a promoted governed structure."""
178
+ content_hash: str
179
+ witness_bundle_hash: str
180
+ witnesses: dict # witness_name β†’ {attested, evidence}
181
+ promoted_at: str = ""
182
+ committed: bool = False
183
+
184
+
185
+ def promote(admitted: list[tuple[Candidate, Decision, FrameExample]]) -> list[tuple[FrameExample, WitnessCommitment]]:
186
+ """PROMOTE phase: 7-witness attestation with cryptographic commitment.
187
+
188
+ Only T-status candidates from DECIDE enter PROMOTE.
189
+ Unanimity required: all 7 witnesses must attest. Any withholding blocks promotion.
190
+ """
191
+ promoted = []
192
+
193
+ for candidate, decision, example in admitted:
194
+ if example is None or decision.tig_status != "T":
195
+ continue
196
+
197
+ # Verify witness unanimity
198
+ if not example.witnesses.is_unanimous():
199
+ continue # Block promotion β€” witness withholding
200
+
201
+ # Build commitment
202
+ bundle_data = json.dumps({
203
+ w.canonical_name: {
204
+ "attested": a.attested,
205
+ "evidence": a.evidence,
206
+ }
207
+ for w, a in example.witnesses.attestations.items()
208
+ }, sort_keys=True)
209
+ bundle_hash = sha256(bundle_data.encode()).hexdigest()
210
+
211
+ commitment = WitnessCommitment(
212
+ content_hash=example.content_hash,
213
+ witness_bundle_hash=bundle_hash,
214
+ witnesses={
215
+ w.canonical_name: {"attested": a.attested, "evidence": a.evidence}
216
+ for w, a in example.witnesses.attestations.items()
217
+ },
218
+ promoted_at=datetime.now(timezone.utc).isoformat(),
219
+ committed=True,
220
+ )
221
+ promoted.append((example, commitment))
222
+
223
+ return promoted
224
+
225
+
226
+ # ═══════════════════════════════════════════════════════════════════════════════
227
+ # PHASE 4: EXECUTE β€” Output within committed envelope
228
+ # ═══════════════════════════════════════════════════════════════════════════════
229
+
230
+ @dataclass
231
+ class GovernedOutput:
232
+ """Final governed output β€” channel_bly witnessed, committed, traceable."""
233
+ content_hash: str
234
+ gov_structure: dict # G, S, F operator compositions
235
+ witness_commitment: dict
236
+ provenance: dict
237
+ generated_at: str = ""
238
+
239
+
240
+ def execute(promoted: list[tuple[FrameExample, WitnessCommitment]]) -> list[GovernedOutput]:
241
+ """EXECUTE phase: Produce governed output within committed validity envelope.
242
+
243
+ Each output carries its full governance trace:
244
+ - governed structure (what was crystallized)
245
+ - Witness commitment (who attested)
246
+ - Provenance (where it came from)
247
+ """
248
+ outputs = []
249
+
250
+ for example, commitment in promoted:
251
+ output = GovernedOutput(
252
+ content_hash=commitment.content_hash,
253
+ gov_structure={
254
+ "G": [{"operator": e.operator.canonical_name, "evidence": e.evidence}
255
+ for e in example.channel_a.operators.expressions],
256
+ "S": [{"operator": e.operator.canonical_name, "evidence": e.evidence}
257
+ for e in example.channel_b.operators.expressions],
258
+ "F": [{"operator": e.operator.canonical_name, "evidence": e.evidence}
259
+ for e in example.channel_c.operators.expressions],
260
+ },
261
+ witness_commitment=asdict(commitment),
262
+ provenance=asdict(example.provenance),
263
+ generated_at=datetime.now(timezone.utc).isoformat(),
264
+ )
265
+ outputs.append(output)
266
+
267
+ return outputs
268
+
269
+
270
+ # ═══════════════════════════════════════════════════════════════════════════════
271
+ # FULL PIPELINE
272
+ # ═══════════════════════════════════════════════════════════════════════════════
273
+
274
+ @dataclass
275
+ class PipelineReport:
276
+ """Complete report from one pipeline run."""
277
+ proposed: int
278
+ decided_t: int
279
+ decided_u: int
280
+ decided_f: int
281
+ promoted: int
282
+ executed: int
283
+ viki_detections: int
284
+ elapsed_seconds: float
285
+ outputs: list[dict]
286
+
287
+
288
+ def run_governed_pipeline(model, num_candidates: int = 100,
289
+ g_slots: int = 3, s_slots: int = 4, f_slots: int = 3) -> PipelineReport:
290
+ """Run the full 4-phase governed generation pipeline.
291
+
292
+ PROPOSE β†’ DECIDE β†’ PROMOTE β†’ EXECUTE
293
+ """
294
+ start = time.time()
295
+
296
+ # Phase 1: PROPOSE
297
+ candidates = propose(model, num_candidates, g_slots=g_slots, s_slots=s_slots, f_slots=f_slots)
298
+
299
+ # Phase 2: DECIDE
300
+ decided = decide(candidates)
301
+ t_count = sum(1 for _, d, _ in decided if d.tig_status == "T")
302
+ u_count = sum(1 for _, d, _ in decided if d.tig_status == "U")
303
+ f_count = sum(1 for _, d, _ in decided if d.tig_status == "F")
304
+ viki_count = sum(len(d.viki_patterns) for _, d, _ in decided)
305
+
306
+ admitted = [(c, d, e) for c, d, e in decided if d.tig_status == "T" and e is not None]
307
+
308
+ # Phase 3: PROMOTE
309
+ promoted = promote(admitted)
310
+
311
+ # Phase 4: EXECUTE
312
+ outputs = execute(promoted)
313
+
314
+ elapsed = time.time() - start
315
+
316
+ return PipelineReport(
317
+ proposed=num_candidates,
318
+ decided_t=t_count,
319
+ decided_u=u_count,
320
+ decided_f=f_count,
321
+ promoted=len(promoted),
322
+ executed=len(outputs),
323
+ viki_detections=viki_count,
324
+ elapsed_seconds=elapsed,
325
+ outputs=[asdict(o) for o in outputs],
326
+ )