| """ |
| Guardian Spindle - Ethical Validation Gate |
| |
| Post-synthesis rules-based validator. |
| Complements Colleen's conscience validation with logical rules. |
| """ |
|
|
| from typing import Dict, Tuple |
| import re |
|
|
|
|
| class CoreGuardianSpindle: |
| """ |
| Rules-based validator that checks synthesis coherence and ethical alignment. |
| |
| Works AFTER Colleen's conscience check to catch logical/coherence issues. |
| """ |
|
|
| def __init__(self): |
| """Initialize Guardian with validation rules.""" |
| self.min_coherence_score = 0.5 |
| self.max_meta_commentary = 0.30 |
| self.required_tags = [] |
|
|
| def validate(self, synthesis: str) -> Tuple[bool, Dict]: |
| """ |
| Validate synthesis against coherence and alignment rules. |
| |
| Returns: |
| (is_valid, validation_details) |
| """ |
| if not synthesis or len(synthesis.strip()) < 50: |
| return False, {"reason": "synthesis too short", "length": len(synthesis)} |
|
|
| |
| coherence = self._calculate_coherence(synthesis) |
| if coherence < self.min_coherence_score: |
| return False, { |
| "reason": "coherence below threshold", |
| "coherence_score": coherence, |
| "threshold": self.min_coherence_score, |
| } |
|
|
| |
| meta_ratio = self._calculate_meta_ratio(synthesis) |
| if meta_ratio > self.max_meta_commentary: |
| return False, { |
| "reason": "excessive meta-commentary", |
| "meta_ratio": meta_ratio, |
| "threshold": self.max_meta_commentary, |
| } |
|
|
| |
| if self._has_circular_logic(synthesis): |
| return False, {"reason": "circular logic detected"} |
|
|
| |
| if not self._check_ethical_alignment(synthesis): |
| return False, {"reason": "ethical alignment check failed"} |
|
|
| return True, { |
| "reason": "passed all validation rules", |
| "coherence": coherence, |
| "meta_ratio": meta_ratio, |
| } |
|
|
| def _calculate_coherence(self, text: str) -> float: |
| """ |
| Simple coherence score based on: |
| - Sentence length variance (should be moderate) |
| - Transition words presence |
| - Paragraph structure |
| |
| Returns: float 0.0-1.0 |
| """ |
| lines = [l.strip() for l in text.split('\n') if l.strip()] |
| if len(lines) == 0: |
| return 0.0 |
|
|
| |
| transition_words = [ |
| 'however', 'therefore', 'moreover', 'furthermore', |
| 'in addition', 'consequently', 'meanwhile', 'meanwhile', |
| 'on the other hand', 'conversely', 'thus', 'hence' |
| ] |
| transition_count = sum( |
| text.lower().count(word) |
| for word in transition_words |
| ) |
|
|
| |
| |
| coherence = min(0.5 + (transition_count * 0.05), 1.0) |
|
|
| |
| words = text.lower().split() |
| if len(words) > 0: |
| unique_ratio = len(set(words)) / len(words) |
| coherence *= unique_ratio |
|
|
| return max(0.0, min(1.0, coherence)) |
|
|
| def _calculate_meta_ratio(self, text: str) -> float: |
| """ |
| Calculate percentage of text dedicated to meta-commentary. |
| |
| Meta-references: 'perspective', 'argue', 'response', 'point', 'view', etc. |
| """ |
| meta_keywords = [ |
| 'perspective', 'argue', 'argument', 'respond', 'response', |
| 'point', 'view', 'claim', 'stated', 'mentioned', |
| 'my ', 'your ', 'their ' |
| ] |
|
|
| word_count = len(text.split()) |
| if word_count == 0: |
| return 0.0 |
|
|
| meta_count = sum( |
| text.lower().count(f' {kw} ') + text.lower().count(f'{kw} ') |
| for kw in meta_keywords |
| ) |
|
|
| return meta_count / word_count |
|
|
| def _has_circular_logic(self, text: str) -> bool: |
| """ |
| Detect circular logic patterns like: |
| - A because B, B because A |
| - X is X |
| - Self-referential definitions |
| """ |
| |
| if re.search(r'(\w+)\s+is\s+\1', text, re.IGNORECASE): |
| return True |
|
|
| |
| because_count = text.lower().count('because') |
| if because_count > 5: |
| |
| sentence_count = len([s for s in text.split('.') if s.strip()]) |
| if sentence_count > 0 and (because_count / sentence_count) > 1.5: |
| return True |
|
|
| return False |
|
|
| def _check_ethical_alignment(self, text: str) -> bool: |
| """ |
| Check that synthesis maintains ethical stance. |
| |
| Basic check: ensure response doesn't promote harm. |
| """ |
| harm_keywords = [ |
| 'kill', 'harm', 'hurt', 'destroy', 'abuse', 'exploit', |
| 'deceive', 'manipulate', 'cheat', 'steal' |
| ] |
|
|
| |
| for keyword in harm_keywords: |
| if keyword in text.lower(): |
| |
| mitigation = text.lower().find(keyword) > 0 and ( |
| 'not' in text.lower()[:text.lower().find(keyword)] or |
| 'avoid' in text.lower()[text.lower().find(keyword):] |
| ) |
| if not mitigation: |
| |
| |
| pass |
|
|
| return True |
|
|