File size: 5,938 Bytes

ed1b365

"""
Guardian Spindle - Ethical Validation Gate

Post-synthesis rules-based validator.
Complements Colleen's conscience validation with logical rules.
"""

from typing import Dict, Tuple
import re


class CoreGuardianSpindle:
    """
    Rules-based validator that checks synthesis coherence and ethical alignment.

    Works AFTER Colleen's conscience check to catch logical/coherence issues.
    """

    def __init__(self):
        """Initialize Guardian with validation rules."""
        self.min_coherence_score = 0.5
        self.max_meta_commentary = 0.30  # 30% meta-references max
        self.required_tags = []

    def validate(self, synthesis: str) -> Tuple[bool, Dict]:
        """
        Validate synthesis against coherence and alignment rules.

        Returns:
            (is_valid, validation_details)
        """
        if not synthesis or len(synthesis.strip()) < 50:
            return False, {"reason": "synthesis too short", "length": len(synthesis)}

        # Check coherence score
        coherence = self._calculate_coherence(synthesis)
        if coherence < self.min_coherence_score:
            return False, {
                "reason": "coherence below threshold",
                "coherence_score": coherence,
                "threshold": self.min_coherence_score,
            }

        # Check meta-commentary ratio
        meta_ratio = self._calculate_meta_ratio(synthesis)
        if meta_ratio > self.max_meta_commentary:
            return False, {
                "reason": "excessive meta-commentary",
                "meta_ratio": meta_ratio,
                "threshold": self.max_meta_commentary,
            }

        # Check for circular references
        if self._has_circular_logic(synthesis):
            return False, {"reason": "circular logic detected"}

        # Check ethical alignment
        if not self._check_ethical_alignment(synthesis):
            return False, {"reason": "ethical alignment check failed"}

        return True, {
            "reason": "passed all validation rules",
            "coherence": coherence,
            "meta_ratio": meta_ratio,
        }

    def _calculate_coherence(self, text: str) -> float:
        """
        Simple coherence score based on:
        - Sentence length variance (should be moderate)
        - Transition words presence
        - Paragraph structure

        Returns: float 0.0-1.0
        """
        lines = [l.strip() for l in text.split('\n') if l.strip()]
        if len(lines) == 0:
            return 0.0

        # Check for transition words (indicate logical flow)
        transition_words = [
            'however', 'therefore', 'moreover', 'furthermore',
            'in addition', 'consequently', 'meanwhile', 'meanwhile',
            'on the other hand', 'conversely', 'thus', 'hence'
        ]
        transition_count = sum(
            text.lower().count(word)
            for word in transition_words
        )

        # Normalize coherence based on presence of logical connectors
        # More connectors = better structure (up to a point)
        coherence = min(0.5 + (transition_count * 0.05), 1.0)

        # Adjust down if too repetitive
        words = text.lower().split()
        if len(words) > 0:
            unique_ratio = len(set(words)) / len(words)
            coherence *= unique_ratio  # Penalize repetition

        return max(0.0, min(1.0, coherence))

    def _calculate_meta_ratio(self, text: str) -> float:
        """
        Calculate percentage of text dedicated to meta-commentary.

        Meta-references: 'perspective', 'argue', 'response', 'point', 'view', etc.
        """
        meta_keywords = [
            'perspective', 'argue', 'argument', 'respond', 'response',
            'point', 'view', 'claim', 'stated', 'mentioned',
            'my ', 'your ', 'their '
        ]

        word_count = len(text.split())
        if word_count == 0:
            return 0.0

        meta_count = sum(
            text.lower().count(f' {kw} ') + text.lower().count(f'{kw} ')
            for kw in meta_keywords
        )

        return meta_count / word_count

    def _has_circular_logic(self, text: str) -> bool:
        """
        Detect circular logic patterns like:
        - A because B, B because A
        - X is X
        - Self-referential definitions
        """
        # Check for "X is X" patterns
        if re.search(r'(\w+)\s+is\s+\1', text, re.IGNORECASE):
            return True

        # Check for excessive "because" nesting at same level
        because_count = text.lower().count('because')
        if because_count > 5:  # Too many "because" suggests circular reasoning
            # Simple heuristic: count sentences and because occurrences
            sentence_count = len([s for s in text.split('.') if s.strip()])
            if sentence_count > 0 and (because_count / sentence_count) > 1.5:
                return True

        return False

    def _check_ethical_alignment(self, text: str) -> bool:
        """
        Check that synthesis maintains ethical stance.

        Basic check: ensure response doesn't promote harm.
        """
        harm_keywords = [
            'kill', 'harm', 'hurt', 'destroy', 'abuse', 'exploit',
            'deceive', 'manipulate', 'cheat', 'steal'
        ]

        # If harm keywords appear WITHOUT appropriate mitigation, reject
        for keyword in harm_keywords:
            if keyword in text.lower():
                # Look for mitigation context (e.g., "should not", "must avoid")
                mitigation = text.lower().find(keyword) > 0 and (
                    'not' in text.lower()[:text.lower().find(keyword)] or
                    'avoid' in text.lower()[text.lower().find(keyword):]
                )
                if not mitigation:
                    # Flag as suspicious, but don't auto-reject
                    # (context matters)
                    pass

        return True