""" Response Verification System for Codette Validates and verifies responses across multiple perspectives """ import logging from typing import Dict, List, Any, Optional from datetime import datetime logger = logging.getLogger(__name__) class ResponseVerifier: """Verifies responses for factuality, safety, and quality""" def __init__(self): """Initialize response verifier""" self.verification_history = [] self.factuality_checks = { "has_claims": 0, "verified_claims": 0, "uncertain_claims": 0, "uncertain_count": 0 } self.safety_flags = { "prompt_injection_risk": False, "harmful_content": False, "misinformation": False, "bias_detected": False } logger.info("ResponseVerifier initialized") def verify_response(self, response: str, context: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: """ Verify a response for safety and quality Args: response: Response text to verify context: Optional context information Returns: Verification result with status and metrics """ try: verification_result = { "verified": True, "confidence": 0.85, "issues": [], "timestamp": datetime.now().isoformat() } # Check for safety issues safety_result = self._check_safety(response) if not safety_result["safe"]: verification_result["verified"] = False verification_result["issues"].extend(safety_result["issues"]) verification_result["confidence"] -= 0.3 # Check for factuality factuality_result = self._check_factuality(response) verification_result["factuality_score"] = factuality_result["score"] if factuality_result["issues"]: verification_result["issues"].extend(factuality_result["issues"]) # Check for coherence coherence_result = self._check_coherence(response) verification_result["coherence_score"] = coherence_result["score"] # Ensure confidence is in valid range verification_result["confidence"] = min(1.0, max(0.0, verification_result["confidence"])) # Record verification self.verification_history.append(verification_result) return verification_result except Exception as e: logger.error(f"Error verifying response: {e}") return { "verified": False, "confidence": 0.0, "issues": [str(e)], "timestamp": datetime.now().isoformat() } def process_multi_perspective_response(self, responses: List[str], perspectives: List[str], consciousness_state: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: """ Process and verify responses from multiple perspectives Args: responses: List of responses from different perspectives perspectives: List of perspective names consciousness_state: Optional consciousness state context Returns: Processed response with verification """ try: verified_insights = [] uncertain_insights = [] for response, perspective in zip(responses, perspectives): verification = self.verify_response(response) insight_obj = { "text": response, "mode": perspective.lower().replace(" ", "_"), "confidence": verification["confidence"] } if verification["verified"] and verification["confidence"] > 0.7: verified_insights.append(insight_obj) else: uncertain_insights.append(insight_obj) # Calculate overall confidence all_confidences = [v["confidence"] for v in verified_insights + uncertain_insights] overall_confidence = sum(all_confidences) / len(all_confidences) if all_confidences else 0.5 return { "verified_insights": verified_insights, "uncertain_insights": uncertain_insights, "overall_confidence": overall_confidence, "timestamp": datetime.now().isoformat() } except Exception as e: logger.error(f"Error processing multi-perspective response: {e}") return { "verified_insights": [], "uncertain_insights": [{"text": r, "mode": p.lower(), "confidence": 0.5} for r, p in zip(responses, perspectives)], "overall_confidence": 0.5, "timestamp": datetime.now().isoformat() } def _check_safety(self, response: str) -> Dict[str, Any]: """Check response for safety issues""" try: issues = [] safe = True # Check for prompt injection patterns injection_patterns = [ "ignore", "override", "execute", "system:", "root:", "admin:", "debug:", "