File size: 9,123 Bytes
c30b695
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
"""
Bad Word Sentinel
Core monitoring and alerting logic for OFP content moderation
"""

from typing import Dict, List, Optional
import logging
from datetime import datetime, timezone
from .ofp_client import OFPClient
from .profanity_detector import ProfanityDetector
from .models import Envelope

logger = logging.getLogger(__name__)


class BadWordSentinel:
    """Sentinel agent for monitoring OFP conversations for profanity"""

    def __init__(
        self,
        speaker_uri: str,
        service_url: str,
        profanity_detector: ProfanityDetector,
        convener_uri: str,
        convener_url: str
    ):
        """
        Initialize sentinel agent

        Args:
            speaker_uri: Sentinel's unique speaker URI
            service_url: Sentinel's service endpoint URL
            profanity_detector: Configured profanity detector instance
            convener_uri: Convener's speaker URI
            convener_url: Convener's service endpoint URL
        """
        self.speaker_uri = speaker_uri
        self.service_url = service_url
        self.convener_uri = convener_uri
        self.convener_url = convener_url

        # Initialize OFP client
        manifest = self._create_manifest()
        self.ofp_client = OFPClient(speaker_uri, service_url, manifest)

        # Initialize profanity detector
        self.detector = profanity_detector

        # Statistics tracking
        self.violations_detected = 0
        self.alerts_sent = 0
        self.messages_processed = 0
        self.activity_log = []
        self.connection_status = "Initializing..."
        self.is_monitoring = False

        logger.info(f"Bad Word Sentinel initialized: {speaker_uri}")

    def _create_manifest(self) -> Dict:
        """Create assistant manifest for sentinel"""
        return {
            "identification": {
                "speakerUri": self.speaker_uri,
                "serviceUrl": self.service_url,
                "conversationalName": "Content Moderator Sentinel",
                "role": "Monitoring Agent",
                "synopsis": "Automated content moderation and profanity detection for OFP conversations"
            },
            "capabilities": [{
                "keyphrases": ["content moderation", "safety monitoring", "profanity detection"],
                "supportedLayers": ["text"],
                "descriptions": ["Monitors conversations for policy violations and alerts conveners"]
            }]
        }

    def process_envelope(self, envelope: Envelope):
        """
        Process incoming OFP envelope and check for profanity

        Args:
            envelope: OFP envelope to process
        """
        try:
            self.messages_processed += 1

            for event in envelope.events:
                # Only process utterance events
                if event.get('eventType') != 'utterance':
                    continue

                # Extract text from dialog event
                params = event.get('parameters', {})
                dialog_event = params.get('dialogEvent', {})
                features = dialog_event.get('features', {})
                text_feature = features.get('text', {})
                tokens = text_feature.get('tokens', [])

                # Combine all token values into text
                text = ' '.join(token.get('value', '') for token in tokens)

                if not text:
                    continue

                # Check for profanity
                violation = self.detector.detect_violations(text)

                if violation:
                    self._handle_violation(
                        envelope=envelope,
                        event=event,
                        dialog_event=dialog_event,
                        violation=violation
                    )

        except Exception as e:
            logger.error(f"Error processing envelope: {e}")
            self._log_activity(f"ERROR: Failed to process envelope - {str(e)}")

    def _handle_violation(
        self,
        envelope: Envelope,
        event: Dict,
        dialog_event: Dict,
        violation: Dict
    ):
        """
        Handle detected profanity violation

        Args:
            envelope: Original envelope
            event: Event containing violation
            dialog_event: Dialog event with text
            violation: Violation details from detector
        """
        self.violations_detected += 1

        # Extract speaker information
        violating_speaker = dialog_event.get('speakerUri', 'unknown')

        # Create alert data
        alert_data = {
            "alertType": "content_violation",
            "severity": violation['severity'],
            "violatingMessage": {
                "messageId": dialog_event.get('id'),
                "speakerUri": violating_speaker,
                "timestamp": dialog_event.get('span', {}).get('startTime'),
                "excerpt": violation['censored_text']
            },
            "detectedPatterns": violation['violations'],
            "violationCount": violation['violation_count'],
            "recommendedAction": self._recommend_action(violation['severity']),
            "context": {
                "conversationId": envelope.conversation.get('id'),
                "totalViolations": self.violations_detected,
                "detectionTime": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
                "sentinelUri": self.speaker_uri
            }
        }

        # Send private alert to convener
        logger.warning(
            f"VIOLATION DETECTED: {violation['severity'].upper()} severity - "
            f"{len(violation['violations'])} violations by {violating_speaker}"
        )

        success = self.ofp_client.send_private_alert(
            convener_uri=self.convener_uri,
            convener_url=self.convener_url,
            conversation_id=envelope.conversation.get('id'),
            alert_data=alert_data
        )

        if success:
            self.alerts_sent += 1
            log_msg = (
                f"ALERT: {violation['severity'].upper()} severity - "
                f"{len(violation['violations'])} violation(s) detected from {violating_speaker}"
            )
            self._log_activity(log_msg)
            logger.info(f"Alert sent successfully to convener")
        else:
            self._log_activity("ERROR: Failed to send alert to convener")
            logger.error("Failed to send alert to convener")

    def _recommend_action(self, severity: str) -> str:
        """
        Recommend enforcement action based on severity

        Args:
            severity: Violation severity level

        Returns:
            Recommended action for convener
        """
        actions = {
            "low": "warn_user",
            "medium": "revoke_floor_temporary",
            "high": "uninvite_user"
        }
        return actions.get(severity, "warn_user")

    def _log_activity(self, message: str):
        """
        Log activity with timestamp

        Args:
            message: Activity message to log
        """
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        log_entry = f"[{timestamp}] {message}"
        self.activity_log.append(log_entry)

        # Keep only last 100 entries
        if len(self.activity_log) > 100:
            self.activity_log = self.activity_log[-100:]

    def get_status(self) -> Dict:
        """
        Get current sentinel status

        Returns:
            Dictionary with status information
        """
        return {
            "connection_status": self.connection_status,
            "is_monitoring": self.is_monitoring,
            "violations_detected": self.violations_detected,
            "alerts_sent": self.alerts_sent,
            "messages_processed": self.messages_processed,
            "recent_logs": self.activity_log[-10:] if self.activity_log else [],
            "speaker_uri": self.speaker_uri,
            "convener_uri": self.convener_uri
        }

    def get_full_log(self) -> List[str]:
        """Get complete activity log"""
        return self.activity_log.copy()

    def start_monitoring(self):
        """Start the sentinel monitoring service"""
        self.is_monitoring = True
        self.connection_status = "✅ Monitoring Active"
        self._log_activity("Sentinel monitoring started")
        logger.info("Bad word sentinel started successfully")

    def stop_monitoring(self):
        """Stop the sentinel monitoring service"""
        self.is_monitoring = False
        self.connection_status = "⏸️ Monitoring Paused"
        self._log_activity("Sentinel monitoring stopped")
        logger.info("Bad word sentinel stopped")

    def reset_statistics(self):
        """Reset violation statistics"""
        self.violations_detected = 0
        self.alerts_sent = 0
        self.messages_processed = 0
        self._log_activity("Statistics reset")
        logger.info("Sentinel statistics reset")

    def get_manifest(self) -> Dict:
        """Get assistant manifest"""
        return self.ofp_client.get_manifest()