File size: 5,938 Bytes
ed1b365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
"""
Guardian Spindle - Ethical Validation Gate

Post-synthesis rules-based validator.
Complements Colleen's conscience validation with logical rules.
"""

from typing import Dict, Tuple
import re


class CoreGuardianSpindle:
    """
    Rules-based validator that checks synthesis coherence and ethical alignment.

    Works AFTER Colleen's conscience check to catch logical/coherence issues.
    """

    def __init__(self):
        """Initialize Guardian with validation rules."""
        self.min_coherence_score = 0.5
        self.max_meta_commentary = 0.30  # 30% meta-references max
        self.required_tags = []

    def validate(self, synthesis: str) -> Tuple[bool, Dict]:
        """
        Validate synthesis against coherence and alignment rules.

        Returns:
            (is_valid, validation_details)
        """
        if not synthesis or len(synthesis.strip()) < 50:
            return False, {"reason": "synthesis too short", "length": len(synthesis)}

        # Check coherence score
        coherence = self._calculate_coherence(synthesis)
        if coherence < self.min_coherence_score:
            return False, {
                "reason": "coherence below threshold",
                "coherence_score": coherence,
                "threshold": self.min_coherence_score,
            }

        # Check meta-commentary ratio
        meta_ratio = self._calculate_meta_ratio(synthesis)
        if meta_ratio > self.max_meta_commentary:
            return False, {
                "reason": "excessive meta-commentary",
                "meta_ratio": meta_ratio,
                "threshold": self.max_meta_commentary,
            }

        # Check for circular references
        if self._has_circular_logic(synthesis):
            return False, {"reason": "circular logic detected"}

        # Check ethical alignment
        if not self._check_ethical_alignment(synthesis):
            return False, {"reason": "ethical alignment check failed"}

        return True, {
            "reason": "passed all validation rules",
            "coherence": coherence,
            "meta_ratio": meta_ratio,
        }

    def _calculate_coherence(self, text: str) -> float:
        """
        Simple coherence score based on:
        - Sentence length variance (should be moderate)
        - Transition words presence
        - Paragraph structure

        Returns: float 0.0-1.0
        """
        lines = [l.strip() for l in text.split('\n') if l.strip()]
        if len(lines) == 0:
            return 0.0

        # Check for transition words (indicate logical flow)
        transition_words = [
            'however', 'therefore', 'moreover', 'furthermore',
            'in addition', 'consequently', 'meanwhile', 'meanwhile',
            'on the other hand', 'conversely', 'thus', 'hence'
        ]
        transition_count = sum(
            text.lower().count(word)
            for word in transition_words
        )

        # Normalize coherence based on presence of logical connectors
        # More connectors = better structure (up to a point)
        coherence = min(0.5 + (transition_count * 0.05), 1.0)

        # Adjust down if too repetitive
        words = text.lower().split()
        if len(words) > 0:
            unique_ratio = len(set(words)) / len(words)
            coherence *= unique_ratio  # Penalize repetition

        return max(0.0, min(1.0, coherence))

    def _calculate_meta_ratio(self, text: str) -> float:
        """
        Calculate percentage of text dedicated to meta-commentary.

        Meta-references: 'perspective', 'argue', 'response', 'point', 'view', etc.
        """
        meta_keywords = [
            'perspective', 'argue', 'argument', 'respond', 'response',
            'point', 'view', 'claim', 'stated', 'mentioned',
            'my ', 'your ', 'their '
        ]

        word_count = len(text.split())
        if word_count == 0:
            return 0.0

        meta_count = sum(
            text.lower().count(f' {kw} ') + text.lower().count(f'{kw} ')
            for kw in meta_keywords
        )

        return meta_count / word_count

    def _has_circular_logic(self, text: str) -> bool:
        """
        Detect circular logic patterns like:
        - A because B, B because A
        - X is X
        - Self-referential definitions
        """
        # Check for "X is X" patterns
        if re.search(r'(\w+)\s+is\s+\1', text, re.IGNORECASE):
            return True

        # Check for excessive "because" nesting at same level
        because_count = text.lower().count('because')
        if because_count > 5:  # Too many "because" suggests circular reasoning
            # Simple heuristic: count sentences and because occurrences
            sentence_count = len([s for s in text.split('.') if s.strip()])
            if sentence_count > 0 and (because_count / sentence_count) > 1.5:
                return True

        return False

    def _check_ethical_alignment(self, text: str) -> bool:
        """
        Check that synthesis maintains ethical stance.

        Basic check: ensure response doesn't promote harm.
        """
        harm_keywords = [
            'kill', 'harm', 'hurt', 'destroy', 'abuse', 'exploit',
            'deceive', 'manipulate', 'cheat', 'steal'
        ]

        # If harm keywords appear WITHOUT appropriate mitigation, reject
        for keyword in harm_keywords:
            if keyword in text.lower():
                # Look for mitigation context (e.g., "should not", "must avoid")
                mitigation = text.lower().find(keyword) > 0 and (
                    'not' in text.lower()[:text.lower().find(keyword)] or
                    'avoid' in text.lower()[text.lower().find(keyword):]
                )
                if not mitigation:
                    # Flag as suspicious, but don't auto-reject
                    # (context matters)
                    pass

        return True