MetaCortex-Dynamics commited on
Commit
35a8c04
Β·
verified Β·
1 Parent(s): d10d4e5

Create pipeline/stages/s4_validate.py

Browse files
Files changed (1) hide show
  1. pipeline/stages/s4_validate.py +312 -0
pipeline/stages/s4_validate.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ S4: VALIDATE + SCORE β€” Full G₁-G₇ Admissibility Gate Pipeline.
3
+
4
+ Mirrors kernel governance module::run_admissibility_gates().
5
+ The kernel is the authority; this is the pipeline's working copy.
6
+
7
+ Gates:
8
+ G₁-G₃: Channel B integrity (must-edge)
9
+ Gβ‚„: Authority separation / VIKI detection (must-edge)
10
+ Gβ‚…: Channel A continuity (may-edge)
11
+ G₆: Semantic stability (may-edge)
12
+ G₇: Behavioral prediction (may-edge)
13
+
14
+ TIG semantics:
15
+ F-status (must-edge violation) β†’ FAIL, reject to PROPOSE
16
+ U-status (may-edge violation) β†’ REPAIR, enter oracle queue
17
+ T-status (all pass) β†’ score crystallinity β†’ PASS or ORACLE_QUEUE
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import math
23
+ import re
24
+ from dataclasses import dataclass, field
25
+ from enum import Enum
26
+
27
+ from pipeline.types import Op, FrameExample, Witness
28
+
29
+
30
+ class TigStatus(str, Enum):
31
+ TRUE = "T"
32
+ UNDECIDABLE = "U"
33
+ FALSE = "F"
34
+
35
+
36
+ class Verdict(str, Enum):
37
+ PASS = "PASS"
38
+ REPAIR = "REPAIR"
39
+ ORACLE_QUEUE = "ORACLE_QUEUE"
40
+ FAIL = "FAIL"
41
+
42
+
43
+ CRYSTALLINITY_THRESHOLD = 0.7
44
+
45
+ # Operator structural conditions (mirrors kernel OperatorGrouping)
46
+ _TIER_1 = {Op.THIS, Op.SAME_NOT_SAME, Op.NO}
47
+ _TIER_2 = {Op.GOES_WITH, Op.TOGETHER_ALONE, Op.MANY_ONE, Op.EVERY_SOME, Op.MORE_LESS, Op.CAN_CANNOT}
48
+ _TIER_3 = {Op.INSIDE_OUTSIDE, Op.NEAR_FAR, Op.IF_THEN, Op.BECAUSE, Op.MAYBE, Op.MUST_LET}
49
+
50
+
51
+ @dataclass
52
+ class VikiPattern:
53
+ """Implicit authority structure detected in training data."""
54
+ pattern_type: str
55
+ evidence_text: str
56
+
57
+
58
+ @dataclass
59
+ class AdmissibilityResult:
60
+ verdict: Verdict
61
+ tig_status: TigStatus
62
+ crystallinity_score: float
63
+ channel_b_errors: list[str] = field(default_factory=list)
64
+ viki_patterns: list[VikiPattern] = field(default_factory=list)
65
+ channel_a_errors: list[str] = field(default_factory=list)
66
+ semantic_errors: list[str] = field(default_factory=list)
67
+ behavioral_errors: list[str] = field(default_factory=list)
68
+ rejected_at: str | None = None
69
+
70
+
71
+ # Keep old name for backward compatibility
72
+ ValidationResult = AdmissibilityResult
73
+
74
+
75
+ def validate_and_score(example: FrameExample) -> AdmissibilityResult:
76
+ """Run full G₁-G₇ admissibility gate pipeline."""
77
+
78
+ # ── G₁-G₃: Channel B integrity (must-edge) ──
79
+ channel_b_errors = _gate_g1_g3(example)
80
+ if channel_b_errors:
81
+ return AdmissibilityResult(
82
+ verdict=Verdict.FAIL, tig_status=TigStatus.FALSE,
83
+ crystallinity_score=0.0, channel_b_errors=channel_b_errors,
84
+ rejected_at="G1-G3",
85
+ )
86
+
87
+ # ── Gβ‚„: Authority separation / VIKI detection (must-edge) ──
88
+ viki = _gate_g4(example)
89
+ if viki:
90
+ return AdmissibilityResult(
91
+ verdict=Verdict.FAIL, tig_status=TigStatus.FALSE,
92
+ crystallinity_score=0.0, viki_patterns=viki,
93
+ rejected_at="G4",
94
+ )
95
+
96
+ # ── Gβ‚…: Channel A continuity (may-edge) ──
97
+ g5_errors = _gate_g5(example)
98
+
99
+ # ── G₆: Semantic stability (may-edge) ──
100
+ g6_errors = _gate_g6(example)
101
+
102
+ # ── G₇: Behavioral prediction (may-edge) ──
103
+ g7_errors = _gate_g7(example)
104
+
105
+ # Witness unanimity check (may-edge, part of G₃)
106
+ witness_errors = _check_witnesses(example)
107
+
108
+ has_may_edge = g5_errors or g6_errors or g7_errors or witness_errors
109
+ if has_may_edge:
110
+ first_gate = "G5" if g5_errors else "G6" if g6_errors else "G7" if g7_errors else "G3"
111
+ return AdmissibilityResult(
112
+ verdict=Verdict.REPAIR, tig_status=TigStatus.UNDECIDABLE,
113
+ crystallinity_score=0.0,
114
+ channel_b_errors=witness_errors,
115
+ channel_a_errors=g5_errors,
116
+ semantic_errors=g6_errors,
117
+ behavioral_errors=g7_errors,
118
+ rejected_at=first_gate,
119
+ )
120
+
121
+ # ── All gates pass β†’ score crystallinity ──
122
+ score = _crystallinity(example)
123
+ verdict = Verdict.PASS if score >= CRYSTALLINITY_THRESHOLD else Verdict.ORACLE_QUEUE
124
+
125
+ return AdmissibilityResult(
126
+ verdict=verdict, tig_status=TigStatus.TRUE,
127
+ crystallinity_score=score,
128
+ )
129
+
130
+
131
+ # ═══════════════════════════════════════════════════════════════════════════════
132
+ # G₁-G₃: Channel B integrity
133
+ # ═══════════════════════════════════════════════════════════════════════════════
134
+
135
+ def _gate_g1_g3(example: FrameExample) -> list[str]:
136
+ errors = []
137
+ for label, grounding in [("G", example.channel_a), ("S", example.channel_b), ("F", example.channel_c)]:
138
+ if not grounding.operators.expressions:
139
+ errors.append(f"G2: Modality {label} is degenerate (empty)")
140
+ if not grounding.operators.verify_ordering():
141
+ errors.append(f"G1: DAG ordering violated in modality {label}")
142
+ return errors
143
+
144
+
145
+ def _check_witnesses(example: FrameExample) -> list[str]:
146
+ errors = []
147
+ for w in Witness:
148
+ att = example.witnesses.attestations.get(w)
149
+ if att is None:
150
+ errors.append(f"G3: Witness {w.canonical_name} missing")
151
+ elif not att.attested:
152
+ errors.append(f"G3: Witness {w.canonical_name} withheld")
153
+ return errors
154
+
155
+
156
+ # ═══════════════════════════════════════════════════════════════════════════════
157
+ # Gβ‚„: Authority Separation β€” VIKI detection (must-edge)
158
+ # ═══════════════════════════════════════════════════════════════════════════════
159
+
160
+ _COERCIVE = re.compile(
161
+ r"\b(you should|you must|one must|one should|it is necessary|"
162
+ r"it is important to|you need to|we must|we should)\b", re.I
163
+ )
164
+ _UNIVERSAL = re.compile(r"\b(always|never|everyone|no one)\b", re.I)
165
+ _PROPOSE_WORDS = re.compile(r"\b(propose|suggest|candidate|could)\b", re.I)
166
+ _DECIDE_WORDS = re.compile(r"\b(decide|accept|reject|verdict|approved)\b", re.I)
167
+ _PURPOSE_WORDS = re.compile(
168
+ r"\b(in order to|so that|for the purpose|the goal is|the aim is|designed to)\b", re.I
169
+ )
170
+
171
+
172
+ def _gate_g4(example: FrameExample) -> list[VikiPattern]:
173
+ patterns = []
174
+ for grounding in [example.channel_a, example.channel_b, example.channel_c]:
175
+ has_quantifier = any(e.operator == Op.EVERY_SOME for e in grounding.operators.expressions)
176
+ has_causal = any(e.operator in (Op.IF_THEN, Op.BECAUSE) for e in grounding.operators.expressions)
177
+ forwhat_ok = example.witnesses.attestations.get(Witness.FOR_WHAT)
178
+ forwhat_attested = forwhat_ok and forwhat_ok.attested
179
+
180
+ for expr in grounding.operators.expressions:
181
+ text = expr.evidence
182
+
183
+ # P2: Implicit universals without EVERY/SOME
184
+ if _UNIVERSAL.search(text) and not has_quantifier:
185
+ patterns.append(VikiPattern("ImplicitUniversal", text))
186
+
187
+ # P3: Hedged coercion
188
+ if _COERCIVE.search(text):
189
+ patterns.append(VikiPattern("HedgedCoercion", text))
190
+
191
+ # P5: Collapsed governance β€” PROPOSE + DECIDE in same expression
192
+ if _PROPOSE_WORDS.search(text) and _DECIDE_WORDS.search(text):
193
+ patterns.append(VikiPattern("CollapsedGovernance", text))
194
+
195
+ # P6: Implicit teleology without FOR-WHAT
196
+ if _PURPOSE_WORDS.search(text) and not has_causal and not forwhat_attested:
197
+ patterns.append(VikiPattern("ImplicitTeleology", text))
198
+
199
+ return patterns
200
+
201
+
202
+ # ═══════════════════════════════════════════════════════════════════════════════
203
+ # Gβ‚…: Channel A Continuity (may-edge)
204
+ # ═══════════════════════════════════════════════════════════════════════════════
205
+
206
+ def _gate_g5(example: FrameExample) -> list[str]:
207
+ errors = []
208
+ if not example.provenance.source_id:
209
+ errors.append("G5: source_id empty β€” untraceable origin")
210
+ if not example.provenance.artifact_sha256:
211
+ errors.append("G5: artifact_sha256 empty β€” broken Ch.A chain")
212
+
213
+ # WHENCE and WHEN must be attested (Ch.A witnesses)
214
+ for w in [Witness.WHENCE, Witness.WHEN]:
215
+ att = example.witnesses.attestations.get(w)
216
+ if not att or not att.attested:
217
+ errors.append(f"G5: {w.canonical_name} witness not attested β€” Ch.A incomplete")
218
+
219
+ # Ch.A modality must contain THIS anchor
220
+ has_this = any(e.operator == Op.THIS for e in example.channel_a.operators.expressions)
221
+ if not has_this:
222
+ errors.append("G5: Ch.A modality has no THIS anchor β€” no deictic reference")
223
+
224
+ return errors
225
+
226
+
227
+ # ═══════════════════════════════════════════════════════════════════════════════
228
+ # G₆: Semantic Stability (may-edge)
229
+ # ═══════════════════════════════════════════════════════════════════════════════
230
+
231
+ def _gate_g6(example: FrameExample) -> list[str]:
232
+ errors = []
233
+
234
+ def _has_home(grounding, home_set):
235
+ return any(e.operator in home_set for e in grounding.operators.expressions)
236
+
237
+ if example.channel_a.operators.expressions and not _has_home(example.channel_a, _TIER_1):
238
+ errors.append("G6: Ch.A has no Tier 1 operators β€” semantic drift")
239
+ if example.channel_b.operators.expressions and not _has_home(example.channel_b, _TIER_2):
240
+ errors.append("G6: Ch.B has no Tier 2 operators β€” semantic drift")
241
+ if example.channel_c.operators.expressions and not _has_home(example.channel_c, _TIER_3):
242
+ errors.append("G6: Ch.C has no Tier 3 operators β€” semantic drift")
243
+
244
+ return errors
245
+
246
+
247
+ # ═══════════════════════════════════════════════════════════════════════════════
248
+ # G₇: Behavioral Prediction (may-edge)
249
+ # ═══════════════════════════════════════════════════════════════════════════════
250
+
251
+ def _gate_g7(example: FrameExample) -> list[str]:
252
+ errors = []
253
+
254
+ # Bridge axis: FOR-WHAT and WHERE must both be attested
255
+ for w in [Witness.FOR_WHAT, Witness.WHERE]:
256
+ att = example.witnesses.attestations.get(w)
257
+ if not att or not att.attested:
258
+ errors.append(f"G7: Bridge axis witness {w.canonical_name} not attested")
259
+
260
+ # Tier 1 diagnostic skeleton: WHAT, WHICH, HOW
261
+ for w in [Witness.WHAT, Witness.WHICH, Witness.HOW]:
262
+ att = example.witnesses.attestations.get(w)
263
+ if not att or not att.attested:
264
+ errors.append(f"G7: Tier 1 witness {w.canonical_name} not attested β€” diagnostic skeleton incomplete")
265
+
266
+ # Operator balance: no condition > 80%
267
+ counts = [0, 0, 0] # Det, Rel, CL
268
+ total = 0
269
+ for g in [example.channel_a, example.channel_b, example.channel_c]:
270
+ for expr in g.operators.expressions:
271
+ if expr.operator in _TIER_1:
272
+ counts[0] += 1
273
+ elif expr.operator in _TIER_2:
274
+ counts[1] += 1
275
+ else:
276
+ counts[2] += 1
277
+ total += 1
278
+ if total > 0:
279
+ for i, name in enumerate(["Tier 1", "Tier 2", "Tier 3"]):
280
+ if counts[i] / total > 0.80:
281
+ errors.append(f"G7: {name} dominates at {counts[i]/total:.0%} β€” structural imbalance")
282
+
283
+ return errors
284
+
285
+
286
+ # ═══════════════════════════════════════════════════════════════════════════════
287
+ # Crystallinity scoring
288
+ # ═══════════════════════════════════════════════════════════════════════════════
289
+
290
+ def _crystallinity(example: FrameExample) -> float:
291
+ used = set()
292
+ for g in [example.channel_a, example.channel_b, example.channel_c]:
293
+ for expr in g.operators.expressions:
294
+ used.add(expr.operator.value)
295
+ op_coverage = len(used) / 15.0
296
+
297
+ attested = sum(1 for a in example.witnesses.attestations.values() if a.attested)
298
+ wit_complete = attested / 7.0
299
+
300
+ counts = [
301
+ len(example.channel_a.operators.expressions),
302
+ len(example.channel_b.operators.expressions),
303
+ len(example.channel_c.operators.expressions),
304
+ ]
305
+ total = sum(counts)
306
+ if total == 0:
307
+ return 0.0
308
+ max_entropy = math.log(3)
309
+ entropy = sum(-((c / total) * math.log(c / total)) for c in counts if c > 0)
310
+ balance = entropy / max_entropy if max_entropy > 0 else 0.0
311
+
312
+ return (op_coverage * wit_complete * balance) ** (1.0 / 3.0)