petter2025's picture
Create healing_intent.py
f517938 verified
raw
history blame
59.1 kB
# agentic_reliability_framework/infrastructure/healing_intent.py
"""
Healing Intent - OSS creates, Enterprise executes
Enhanced with probabilistic confidence, risk scoring, cost projection,
and full audit trail integration.
This is the core contract between OSS advisory and Enterprise execution.
All intents are immutable and self-validating, ensuring consistency
across the OSS/Enterprise boundary.
The design follows ARF governing principles:
- OSS = advisory intelligence only
- Enterprise = governed execution
- Immutable contracts between layers
- Full provenance and explainability
- Probabilistic uncertainty quantification
Copyright 2025 Juan Petter
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from dataclasses import dataclass, field, asdict
from typing import Dict, Any, Optional, List, ClassVar, Tuple, Union
from datetime import datetime
import hashlib
import json
import time
import uuid
from enum import Enum
import numpy as np
# Import from local infrastructure modules
from .intents import InfrastructureIntent
from ..constants import (
OSS_EDITION,
OSS_LICENSE,
ENTERPRISE_UPGRADE_URL,
EXECUTION_ALLOWED,
MAX_SIMILARITY_CACHE,
SIMILARITY_THRESHOLD,
MAX_POLICY_VIOLATIONS,
MAX_RISK_FACTORS,
MAX_COST_PROJECTIONS,
MAX_DECISION_TREE_DEPTH,
MAX_ALTERNATIVE_ACTIONS,
OSSBoundaryError,
)
class HealingIntentError(Exception):
"""Base exception for HealingIntent errors"""
pass
class SerializationError(HealingIntentError):
"""Error during serialization/deserialization"""
pass
class ValidationError(HealingIntentError):
"""Error during intent validation"""
pass
class IntentSource(str, Enum):
"""Source of the healing intent - matches old ARF patterns"""
OSS_ANALYSIS = "oss_analysis"
HUMAN_OVERRIDE = "human_override"
AUTOMATED_LEARNING = "automated_learning" # Enterprise only
SCHEDULED_ACTION = "scheduled_action" # Enterprise only
RAG_SIMILARITY = "rag_similarity" # From RAG graph similarity
INFRASTRUCTURE_ANALYSIS = "infrastructure_analysis" # From infra module
POLICY_VIOLATION = "policy_violation" # From policy engine
COST_OPTIMIZATION = "cost_optimization" # From cost analysis
class IntentStatus(str, Enum):
"""Status of the healing intent - enhanced with partial states"""
CREATED = "created"
PENDING_EXECUTION = "pending_execution"
EXECUTING = "executing"
EXECUTING_PARTIAL = "executing_partial"
COMPLETED = "completed"
COMPLETED_PARTIAL = "completed_partial"
FAILED = "failed"
REJECTED = "rejected"
CANCELLED = "cancelled"
ROLLED_BACK = "rolled_back"
OSS_ADVISORY_ONLY = "oss_advisory_only"
PENDING_APPROVAL = "pending_approval"
APPROVED = "approved"
APPROVED_WITH_OVERRIDES = "approved_with_overrides"
class RecommendedAction(str, Enum):
"""
Advisory recommendation from the OSS engine.
Matches the infrastructure module's RecommendedAction.
"""
APPROVE = "approve"
DENY = "deny"
ESCALATE = "escalate"
DEFER = "defer" # Wait for more information
class ConfidenceDistribution:
"""
Probabilistic confidence representation.
Instead of a single confidence score, this represents a distribution
of possible confidence values, allowing for uncertainty quantification.
Matches patterns from the risk_engine.py module.
"""
def __init__(self, mean: float, std: float = 0.05, samples: Optional[List[float]] = None):
self.mean = max(0.0, min(mean, 1.0))
self.std = max(0.0, min(std, 0.5))
self._samples = samples or list(np.random.normal(self.mean, self.std, 1000).clip(0, 1))
@property
def p5(self) -> float:
"""5th percentile (pessimistic)"""
return float(np.percentile(self._samples, 5))
@property
def p50(self) -> float:
"""50th percentile (median)"""
return float(np.percentile(self._samples, 50))
@property
def p95(self) -> float:
"""95th percentile (optimistic)"""
return float(np.percentile(self._samples, 95))
@property
def confidence_interval(self) -> Tuple[float, float]:
"""95% confidence interval"""
return (self.p5, self.p95)
def to_dict(self) -> Dict[str, float]:
"""Serialize to dictionary"""
return {
"mean": self.mean,
"std": self.std,
"p5": self.p5,
"p50": self.p50,
"p95": self.p95
}
@classmethod
def from_dict(cls, data: Dict[str, float]) -> "ConfidenceDistribution":
"""Deserialize from dictionary"""
return cls(
mean=data["mean"],
std=data.get("std", 0.05),
samples=None # Will regenerate on access
)
def __repr__(self) -> str:
return f"ConfidenceDistribution(mean={self.mean:.3f}, 95% CI=[{self.p5:.3f}, {self.p95:.3f}])"
@dataclass(frozen=True, slots=True)
class HealingIntent:
"""
OSS-generated healing recommendation for Enterprise execution
Enhanced with:
- Probabilistic confidence distributions
- Risk score and cost projection integration
- Decision tree tracking for explainability
- Human override audit trail
- Partial execution support
- Integration with infrastructure governance module
- Full backward compatibility with old ARF patterns
This is the clean boundary between OSS intelligence and Enterprise execution:
- OSS creates HealingIntent through analysis (advisory only)
- Enterprise executes HealingIntent through execution gateway
- Immutable (frozen) to ensure consistency across OSS→Enterprise handoff
"""
# === CORE ACTION FIELDS (Sent to Enterprise) ===
action: str # Tool name, e.g., "restart_container", "provision_vm"
component: str # Target component or resource
parameters: Dict[str, Any] = field(default_factory=dict) # Action parameters
justification: str = "" # OSS reasoning chain
# === CONFIDENCE & METADATA ===
confidence: float = 0.85 # OSS confidence score (0.0 to 1.0)
confidence_distribution: Optional[Dict[str, float]] = None # Probabilistic confidence
incident_id: str = "" # Source incident identifier
detected_at: float = field(default_factory=time.time) # When OSS detected
# === RISK AND COST INTEGRATION ===
risk_score: Optional[float] = None # From risk engine (0-1)
risk_factors: Optional[Dict[str, float]] = None # Breakdown by factor
cost_projection: Optional[float] = None # Estimated cost impact
cost_confidence_interval: Optional[Tuple[float, float]] = None # 95% CI
recommended_action: Optional[RecommendedAction] = None # From risk engine
# === DECISION TRACKING ===
decision_tree: Optional[List[Dict[str, Any]]] = None # How decision was reached
alternative_actions: Optional[List[Dict[str, Any]]] = None # Alternatives considered
risk_profile: Optional[str] = None # Risk tolerance used (conservative/moderate/aggressive)
# === OSS ANALYSIS CONTEXT (Stays in OSS) ===
reasoning_chain: Optional[List[Dict[str, Any]]] = None
similar_incidents: Optional[List[Dict[str, Any]]] = None
rag_similarity_score: Optional[float] = None
source: IntentSource = IntentSource.OSS_ANALYSIS
# === IMMUTABLE IDENTIFIERS ===
intent_id: str = field(default_factory=lambda: f"intent_{uuid.uuid4().hex[:16]}")
created_at: float = field(default_factory=time.time)
# === EXECUTION METADATA (Set by Enterprise) ===
status: IntentStatus = IntentStatus.CREATED
execution_id: Optional[str] = None
executed_at: Optional[float] = None
execution_result: Optional[Dict[str, Any]] = None
enterprise_metadata: Dict[str, Any] = field(default_factory=dict)
# === HUMAN INTERACTION TRACKING ===
human_overrides: List[Dict[str, Any]] = field(default_factory=list) # Audit trail
approvals: List[Dict[str, Any]] = field(default_factory=list) # Who approved what
comments: List[Dict[str, Any]] = field(default_factory=list) # Human comments
# === OSS EDITION METADATA ===
oss_edition: str = OSS_EDITION
oss_license: str = OSS_LICENSE
requires_enterprise: bool = True # Always True for OSS-generated intents
execution_allowed: bool = EXECUTION_ALLOWED # From OSS constants
# === INFRASTRUCTURE INTEGRATION ===
infrastructure_intent_id: Optional[str] = None # Link to infrastructure intent if any
policy_violations: List[str] = field(default_factory=list) # From policy engine
infrastructure_intent: Optional[Dict[str, Any]] = None # Original infrastructure intent
# Class constants for validation
MIN_CONFIDENCE: ClassVar[float] = 0.0
MAX_CONFIDENCE: ClassVar[float] = 1.0
MAX_JUSTIFICATION_LENGTH: ClassVar[int] = 5000
MAX_PARAMETERS_SIZE: ClassVar[int] = 100
MAX_SIMILAR_INCIDENTS: ClassVar[int] = MAX_SIMILARITY_CACHE
VERSION: ClassVar[str] = "2.0.0" # Major bump for probabilistic features
def __post_init__(self) -> None:
"""Validate HealingIntent after initialization with OSS boundaries"""
self._validate_oss_boundaries()
self._validate_risk_integration()
def _validate_oss_boundaries(self) -> None:
"""Validate all fields against OSS limits"""
errors: List[str] = []
# Validate confidence range
if not (self.MIN_CONFIDENCE <= self.confidence <= self.MAX_CONFIDENCE):
errors.append(
f"Confidence must be between {self.MIN_CONFIDENCE} and "
f"{self.MAX_CONFIDENCE}, got {self.confidence}"
)
# Validate justification length
if len(self.justification) > self.MAX_JUSTIFICATION_LENGTH:
errors.append(
f"Justification exceeds max length {self.MAX_JUSTIFICATION_LENGTH}"
)
# Validate action and component
if not self.action or not self.action.strip():
errors.append("Action cannot be empty")
if not self.component or not self.component.strip():
errors.append("Component cannot be empty")
# Validate parameters size
if len(self.parameters) > self.MAX_PARAMETERS_SIZE:
errors.append(
f"Too many parameters: {len(self.parameters)} > {self.MAX_PARAMETERS_SIZE}"
)
# Validate parameters are JSON serializable
try:
json.dumps(self.parameters)
except (TypeError, ValueError) as e:
errors.append(f"Parameters must be JSON serializable: {e}")
# Validate similar incidents
if self.similar_incidents:
if len(self.similar_incidents) > self.MAX_SIMILAR_INCIDENTS:
errors.append(
f"Too many similar incidents: {len(self.similar_incidents)} > "
f"{self.MAX_SIMILAR_INCIDENTS}"
)
# Validate OSS edition restrictions
if self.oss_edition == OSS_EDITION:
if self.execution_allowed:
errors.append("Execution not allowed in OSS edition")
if self.status == IntentStatus.EXECUTING:
errors.append("EXECUTING status not allowed in OSS edition")
if self.executed_at is not None:
errors.append("executed_at should not be set in OSS edition")
if self.execution_id is not None:
errors.append("execution_id should not be set in OSS edition")
if errors:
raise ValidationError(
f"HealingIntent validation failed:\n" +
"\n".join(f" • {error}" for error in errors)
)
def _validate_risk_integration(self) -> None:
"""Validate that risk and cost fields are consistent"""
if self.risk_score is not None:
if not (0.0 <= self.risk_score <= 1.0):
raise ValidationError(f"Risk score must be between 0 and 1, got {self.risk_score}")
if self.cost_projection is not None and self.cost_projection < 0:
raise ValidationError(f"Cost projection cannot be negative, got {self.cost_projection}")
if self.cost_confidence_interval is not None:
low, high = self.cost_confidence_interval
if low > high:
raise ValidationError(f"Invalid confidence interval: [{low}, {high}]")
@property
def deterministic_id(self) -> str:
"""
Deterministic ID for idempotency based on action + component + parameters
This ensures the same action on the same component with the same parameters
generates the same intent ID, preventing duplicate executions.
"""
data = {
"action": self.action,
"component": self.component,
"parameters": self._normalize_parameters(self.parameters),
"incident_id": self.incident_id,
"detected_at": int(self.detected_at),
"oss_edition": self.oss_edition,
}
# Sort keys for deterministic JSON
json_str = json.dumps(data, sort_keys=True, default=str)
# Create hash-based ID
hash_digest = hashlib.sha256(json_str.encode()).hexdigest()
return f"intent_{hash_digest[:16]}"
@property
def age_seconds(self) -> float:
"""Get age of intent in seconds"""
return time.time() - self.created_at
@property
def is_executable(self) -> bool:
"""Check if intent is ready for execution"""
# In OSS edition, nothing is executable
if self.oss_edition == OSS_EDITION:
return False
return self.status in [
IntentStatus.CREATED,
IntentStatus.PENDING_EXECUTION,
IntentStatus.APPROVED
]
@property
def is_oss_advisory(self) -> bool:
"""Check if this is an OSS advisory-only intent"""
return self.oss_edition == OSS_EDITION and not self.execution_allowed
@property
def requires_enterprise_upgrade(self) -> bool:
"""Check if intent requires Enterprise upgrade"""
return self.requires_enterprise and self.oss_edition == OSS_EDITION
@property
def confidence_interval(self) -> Optional[Tuple[float, float]]:
"""Get confidence interval if distribution is available"""
if self.confidence_distribution:
return (self.confidence_distribution.get("p5", self.confidence),
self.confidence_distribution.get("p95", self.confidence))
return None
def to_enterprise_request(self) -> Dict[str, Any]:
"""
Convert to Enterprise API request format
Returns only the data needed for Enterprise execution.
OSS analysis context stays in OSS.
"""
return {
# Core execution fields
"intent_id": self.deterministic_id,
"action": self.action,
"component": self.component,
"parameters": self.parameters,
"justification": self.justification,
# OSS metadata for Enterprise context
"confidence": self.confidence,
"confidence_interval": self.confidence_interval,
"risk_score": self.risk_score,
"cost_projection": self.cost_projection,
"incident_id": self.incident_id,
"detected_at": self.detected_at,
"created_at": self.created_at,
"source": self.source.value,
"recommended_action": self.recommended_action.value if self.recommended_action else None,
# OSS edition information
"oss_edition": self.oss_edition,
"oss_license": self.oss_license,
"requires_enterprise": self.requires_enterprise,
"execution_allowed": self.execution_allowed,
"version": self.VERSION,
# Minimal OSS context (for debugging only)
"oss_metadata": {
"similar_incidents_count": len(self.similar_incidents) if self.similar_incidents else 0,
"rag_similarity_score": self.rag_similarity_score,
"has_reasoning_chain": self.reasoning_chain is not None,
"source": self.source.value,
"is_oss_advisory": self.is_oss_advisory,
"risk_factors": self.risk_factors,
"policy_violations_count": len(self.policy_violations) if self.policy_violations else 0,
"confidence_basis": self._get_confidence_basis(),
"learning_applied": False,
"learning_reason": "OSS advisory mode does not persist or learn from outcomes",
},
# Upgrade information
"upgrade_url": ENTERPRISE_UPGRADE_URL,
"enterprise_features": [
"autonomous_execution",
"approval_workflows",
"persistent_storage",
"learning_engine",
"audit_trails",
"compliance_reports",
"multi_tenant_support",
"sso_integration",
"24_7_support",
"probabilistic_confidence",
"risk_analytics",
"cost_optimization"
]
}
def _get_confidence_basis(self) -> str:
"""Determine confidence basis based on available data"""
if self.recommended_action == RecommendedAction.DENY and self.policy_violations:
return "policy_violation"
if self.rag_similarity_score and self.rag_similarity_score > SIMILARITY_THRESHOLD:
return "historical_similarity"
if self.risk_score is not None:
return "risk_based"
return "policy_only"
def to_dict(self, include_oss_context: bool = False) -> Dict[str, Any]:
"""
Convert to dictionary for serialization
Args:
include_oss_context: Whether to include OSS analysis context
(should be False when sending to Enterprise)
Returns:
Dictionary representation of the intent
"""
data = asdict(self)
# Convert enums to strings
if "source" in data and isinstance(data["source"], IntentSource):
data["source"] = self.source.value
if "status" in data and isinstance(data["status"], IntentStatus):
data["status"] = self.status.value
if "recommended_action" in data and isinstance(data["recommended_action"], RecommendedAction):
data["recommended_action"] = self.recommended_action.value if self.recommended_action else None
# Remove OSS context if not needed
if not include_oss_context:
data.pop("reasoning_chain", None)
data.pop("similar_incidents", None)
data.pop("rag_similarity_score", None)
data.pop("decision_tree", None)
data.pop("alternative_actions", None)
data.pop("infrastructure_intent", None)
# Add computed properties
data["deterministic_id"] = self.deterministic_id
data["age_seconds"] = self.age_seconds
data["is_executable"] = self.is_executable
data["is_oss_advisory"] = self.is_oss_advisory
data["requires_enterprise_upgrade"] = self.requires_enterprise_upgrade
data["version"] = self.VERSION
data["confidence_interval"] = self.confidence_interval
return data
def with_execution_result(
self,
execution_id: str,
executed_at: float,
result: Dict[str, Any],
status: IntentStatus = IntentStatus.COMPLETED,
metadata: Optional[Dict[str, Any]] = None
) -> "HealingIntent":
"""
Create a new HealingIntent with execution results (used by Enterprise)
This is how Enterprise updates the intent after execution.
Returns a new immutable intent with execution results.
"""
# Create a new dataclass with updated fields
return HealingIntent(
# Core fields (copied)
action=self.action,
component=self.component,
parameters=self.parameters,
justification=self.justification,
confidence=self.confidence,
confidence_distribution=self.confidence_distribution,
incident_id=self.incident_id,
detected_at=self.detected_at,
# Risk and cost fields (copied)
risk_score=self.risk_score,
risk_factors=self.risk_factors,
cost_projection=self.cost_projection,
cost_confidence_interval=self.cost_confidence_interval,
recommended_action=self.recommended_action,
# Decision tracking (copied)
decision_tree=self.decision_tree,
alternative_actions=self.alternative_actions,
risk_profile=self.risk_profile,
# OSS context (copied)
reasoning_chain=self.reasoning_chain,
similar_incidents=self.similar_incidents,
rag_similarity_score=self.rag_similarity_score,
source=self.source,
# Identifiers (copied)
intent_id=self.intent_id,
created_at=self.created_at,
# OSS metadata (copied)
oss_edition=self.oss_edition,
oss_license=self.oss_license,
requires_enterprise=self.requires_enterprise,
execution_allowed=self.execution_allowed,
# Infrastructure integration (copied)
infrastructure_intent_id=self.infrastructure_intent_id,
policy_violations=self.policy_violations,
infrastructure_intent=self.infrastructure_intent,
# Updated execution fields
status=status,
execution_id=execution_id,
executed_at=executed_at,
execution_result=result,
enterprise_metadata={**(self.enterprise_metadata or {}), **(metadata or {})},
# Human interaction (copied)
human_overrides=self.human_overrides,
approvals=self.approvals,
comments=self.comments
)
def with_human_approval(
self,
approver: str,
approval_time: float,
comments: Optional[str] = None,
overrides: Optional[Dict[str, Any]] = None
) -> "HealingIntent":
"""
Record human approval with optional overrides.
Returns a new intent with approval recorded and status updated.
"""
approval_record = {
"approver": approver,
"timestamp": approval_time,
"comments": comments,
"overrides": overrides
}
new_overrides = list(self.human_overrides)
if overrides:
new_overrides.append({
"overrider": approver,
"timestamp": approval_time,
"overrides": overrides,
"reason": comments
})
new_approvals = list(self.approvals)
new_approvals.append(approval_record)
new_comments = list(self.comments)
if comments:
new_comments.append({
"author": approver,
"timestamp": approval_time,
"comment": comments
})
return HealingIntent(
# Copy all fields
action=self.action,
component=self.component,
parameters=self.parameters,
justification=self.justification,
confidence=self.confidence,
confidence_distribution=self.confidence_distribution,
incident_id=self.incident_id,
detected_at=self.detected_at,
risk_score=self.risk_score,
risk_factors=self.risk_factors,
cost_projection=self.cost_projection,
cost_confidence_interval=self.cost_confidence_interval,
recommended_action=self.recommended_action,
decision_tree=self.decision_tree,
alternative_actions=self.alternative_actions,
risk_profile=self.risk_profile,
reasoning_chain=self.reasoning_chain,
similar_incidents=self.similar_incidents,
rag_similarity_score=self.rag_similarity_score,
source=self.source,
intent_id=self.intent_id,
created_at=self.created_at,
status=IntentStatus.APPROVED_WITH_OVERRIDES if overrides else IntentStatus.APPROVED,
execution_id=self.execution_id,
executed_at=self.executed_at,
execution_result=self.execution_result,
enterprise_metadata=self.enterprise_metadata,
human_overrides=new_overrides,
approvals=new_approvals,
comments=new_comments,
oss_edition=self.oss_edition,
oss_license=self.oss_license,
requires_enterprise=self.requires_enterprise,
execution_allowed=self.execution_allowed,
infrastructure_intent_id=self.infrastructure_intent_id,
policy_violations=self.policy_violations,
infrastructure_intent=self.infrastructure_intent
)
def mark_as_sent_to_enterprise(self) -> "HealingIntent":
"""
Mark intent as sent to Enterprise (used by OSS)
Returns a new intent with status updated to PENDING_EXECUTION
"""
return HealingIntent(
# Copy all fields
action=self.action,
component=self.component,
parameters=self.parameters,
justification=self.justification,
confidence=self.confidence,
confidence_distribution=self.confidence_distribution,
incident_id=self.incident_id,
detected_at=self.detected_at,
risk_score=self.risk_score,
risk_factors=self.risk_factors,
cost_projection=self.cost_projection,
cost_confidence_interval=self.cost_confidence_interval,
recommended_action=self.recommended_action,
decision_tree=self.decision_tree,
alternative_actions=self.alternative_actions,
risk_profile=self.risk_profile,
reasoning_chain=self.reasoning_chain,
similar_incidents=self.similar_incidents,
rag_similarity_score=self.rag_similarity_score,
source=self.source,
intent_id=self.intent_id,
created_at=self.created_at,
status=IntentStatus.PENDING_EXECUTION,
execution_id=self.execution_id,
executed_at=self.executed_at,
execution_result=self.execution_result,
enterprise_metadata=self.enterprise_metadata,
human_overrides=self.human_overrides,
approvals=self.approvals,
comments=self.comments,
oss_edition=self.oss_edition,
oss_license=self.oss_license,
requires_enterprise=self.requires_enterprise,
execution_allowed=self.execution_allowed,
infrastructure_intent_id=self.infrastructure_intent_id,
policy_violations=self.policy_violations,
infrastructure_intent=self.infrastructure_intent
)
def mark_as_oss_advisory(self) -> "HealingIntent":
"""
Mark intent as OSS advisory only
Used when OSS creates an intent that can only be advisory
"""
return HealingIntent(
# Copy all fields
action=self.action,
component=self.component,
parameters=self.parameters,
justification=self.justification,
confidence=self.confidence,
confidence_distribution=self.confidence_distribution,
incident_id=self.incident_id,
detected_at=self.detected_at,
risk_score=self.risk_score,
risk_factors=self.risk_factors,
cost_projection=self.cost_projection,
cost_confidence_interval=self.cost_confidence_interval,
recommended_action=self.recommended_action,
decision_tree=self.decision_tree,
alternative_actions=self.alternative_actions,
risk_profile=self.risk_profile,
reasoning_chain=self.reasoning_chain,
similar_incidents=self.similar_incidents,
rag_similarity_score=self.rag_similarity_score,
source=self.source,
intent_id=self.intent_id,
created_at=self.created_at,
status=IntentStatus.OSS_ADVISORY_ONLY,
execution_id=self.execution_id,
executed_at=self.executed_at,
execution_result=self.execution_result,
enterprise_metadata=self.enterprise_metadata,
human_overrides=self.human_overrides,
approvals=self.approvals,
comments=self.comments,
oss_edition=self.oss_edition,
oss_license=self.oss_license,
requires_enterprise=self.requires_enterprise,
execution_allowed=False, # Force no execution in OSS
infrastructure_intent_id=self.infrastructure_intent_id,
policy_violations=self.policy_violations,
infrastructure_intent=self.infrastructure_intent
)
@classmethod
def from_infrastructure_intent(
cls,
infrastructure_intent: Any, # InfrastructureIntent type
action: str,
component: str,
parameters: Dict[str, Any],
justification: str,
confidence: float = 0.85,
risk_score: Optional[float] = None,
risk_factors: Optional[Dict[str, float]] = None,
cost_projection: Optional[float] = None,
policy_violations: Optional[List[str]] = None,
recommended_action: Optional[RecommendedAction] = None,
source: IntentSource = IntentSource.INFRASTRUCTURE_ANALYSIS
) -> "HealingIntent":
"""
Create HealingIntent from infrastructure module analysis.
This bridges the infrastructure governance module with the healing system.
"""
# Extract intent_id if available
infrastructure_intent_id = getattr(infrastructure_intent, 'intent_id', None)
# Convert infrastructure intent to dict for storage
if hasattr(infrastructure_intent, 'model_dump'):
intent_dict = infrastructure_intent.model_dump()
elif hasattr(infrastructure_intent, 'to_dict'):
intent_dict = infrastructure_intent.to_dict()
else:
intent_dict = {"type": str(type(infrastructure_intent))}
return cls(
action=action,
component=component,
parameters=parameters,
justification=justification,
confidence=confidence,
risk_score=risk_score,
risk_factors=risk_factors,
cost_projection=cost_projection,
policy_violations=policy_violations or [],
recommended_action=recommended_action,
source=source,
infrastructure_intent_id=infrastructure_intent_id,
infrastructure_intent=intent_dict,
oss_edition=OSS_EDITION,
requires_enterprise=True,
execution_allowed=False
)
@classmethod
def from_analysis(
cls,
action: str,
component: str,
parameters: Dict[str, Any],
justification: str,
confidence: float,
confidence_std: float = 0.05,
similar_incidents: Optional[List[Dict[str, Any]]] = None,
reasoning_chain: Optional[List[Dict[str, Any]]] = None,
incident_id: str = "",
source: IntentSource = IntentSource.OSS_ANALYSIS,
rag_similarity_score: Optional[float] = None,
risk_score: Optional[float] = None,
cost_projection: Optional[float] = None,
) -> "HealingIntent":
"""
Factory method for creating HealingIntent from OSS analysis
This is the primary way OSS creates intents.
Enhanced with probabilistic confidence and risk integration.
"""
# Apply OSS limits to similar incidents
if similar_incidents and len(similar_incidents) > cls.MAX_SIMILAR_INCIDENTS:
similar_incidents = similar_incidents[:cls.MAX_SIMILAR_INCIDENTS]
# Create confidence distribution
conf_dist = ConfidenceDistribution(confidence, confidence_std)
# Calculate enhanced confidence based on similar incidents
enhanced_confidence = confidence
if similar_incidents:
similarity_scores = [
inc.get("similarity", 0.0)
for inc in similar_incidents
if "similarity" in inc
]
if similarity_scores:
avg_similarity = sum(similarity_scores) / len(similarity_scores)
# Cap the boost to prevent overconfidence
confidence_boost = min(0.2, avg_similarity * 0.3)
enhanced_confidence = min(confidence * (1.0 + confidence_boost), cls.MAX_CONFIDENCE)
# Use provided RAG score or calculate from similar incidents
final_rag_score = rag_similarity_score
if final_rag_score is None and similar_incidents and len(similar_incidents) > 0:
# Take average of top 3 similarities
top_similarities = [
inc.get("similarity", 0.0)
for inc in similar_incidents[:3]
if "similarity" in inc
]
if top_similarities:
final_rag_score = sum(top_similarities) / len(top_similarities)
return cls(
action=action,
component=component,
parameters=parameters,
justification=justification,
confidence=enhanced_confidence,
confidence_distribution=conf_dist.to_dict(),
incident_id=incident_id,
similar_incidents=similar_incidents,
reasoning_chain=reasoning_chain,
rag_similarity_score=final_rag_score,
source=source,
risk_score=risk_score,
cost_projection=cost_projection,
oss_edition=OSS_EDITION,
requires_enterprise=True,
execution_allowed=False,
)
@classmethod
def from_rag_recommendation(
cls,
action: str,
component: str,
parameters: Dict[str, Any],
rag_similarity_score: float,
similar_incidents: List[Dict[str, Any]],
justification_template: str = "Based on {count} similar historical incidents with {success_rate:.0%} success rate",
success_rate: Optional[float] = None,
risk_score: Optional[float] = None,
cost_projection: Optional[float] = None,
) -> "HealingIntent":
"""
Create HealingIntent from RAG graph recommendation
Specialized factory for RAG-based recommendations
"""
if not similar_incidents:
raise ValidationError("RAG recommendation requires similar incidents")
# Calculate success rate if not provided
if success_rate is None:
if len(similar_incidents) == 0:
success_rate = 0.0
else:
successful = sum(1 for inc in similar_incidents if inc.get("success", False))
success_rate = successful / len(similar_incidents)
# Generate justification
justification = justification_template.format(
count=len(similar_incidents),
success_rate=success_rate or 0.0,
action=action,
component=component,
)
# Calculate confidence based on RAG similarity
base_confidence = rag_similarity_score * 0.8 # Scale similarity to confidence
if success_rate:
base_confidence = base_confidence * (0.7 + success_rate * 0.3)
return cls.from_analysis(
action=action,
component=component,
parameters=parameters,
justification=justification,
confidence=min(base_confidence, 0.95), # Cap at 95%
similar_incidents=similar_incidents,
incident_id=similar_incidents[0].get("incident_id", "") if similar_incidents else "",
source=IntentSource.RAG_SIMILARITY,
rag_similarity_score=rag_similarity_score,
risk_score=risk_score,
cost_projection=cost_projection,
)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "HealingIntent":
"""
Create from dictionary (deserialize)
Handles versioning and field conversion
"""
# Handle versioning
version = data.get("version", "1.0.0")
# Create a copy to avoid mutating input
clean_data = data.copy()
# Convert string enums back to Enum instances
if "source" in clean_data and isinstance(clean_data["source"], str):
clean_data["source"] = IntentSource(clean_data["source"])
if "status" in clean_data and isinstance(clean_data["status"], str):
clean_data["status"] = IntentStatus(clean_data["status"])
if "recommended_action" in clean_data and isinstance(clean_data["recommended_action"], str):
try:
clean_data["recommended_action"] = RecommendedAction(clean_data["recommended_action"])
except ValueError:
clean_data["recommended_action"] = None
# Remove computed fields that shouldn't be in constructor
clean_data.pop("deterministic_id", None)
clean_data.pop("age_seconds", None)
clean_data.pop("is_executable", None)
clean_data.pop("is_oss_advisory", None)
clean_data.pop("requires_enterprise_upgrade", None)
clean_data.pop("version", None)
clean_data.pop("confidence_interval", None)
return cls(**clean_data)
def _normalize_parameters(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""
Normalize parameters for deterministic hashing
Ensures that parameter order and minor format differences
don't affect the deterministic ID.
"""
normalized: Dict[str, Any] = {}
for key, value in sorted(params.items()):
normalized[key] = self._normalize_value(value)
return normalized
def _normalize_value(self, value: Any) -> Any:
"""Normalize a single value for hashing"""
if isinstance(value, (int, float, str, bool, type(None))):
return value
elif isinstance(value, (list, tuple, set)):
# Convert all iterables to sorted tuples
normalized_items = tuple(
sorted(
self._normalize_value(v) for v in value
)
)
return normalized_items
elif isinstance(value, dict):
# Recursively normalize dicts
return self._normalize_parameters(value)
elif hasattr(value, '__dict__'):
# Handle objects with __dict__
return self._normalize_parameters(value.__dict__)
else:
# Convert to string representation for other types
try:
return str(value)
except Exception:
# Fallback for objects that can't be stringified
return f"<unserializable:{type(value).__name__}>"
def get_oss_context(self) -> Dict[str, Any]:
"""
Get OSS analysis context (stays in OSS)
This data never leaves the OSS environment for privacy and IP protection.
"""
return {
"reasoning_chain": self.reasoning_chain,
"similar_incidents": self.similar_incidents,
"rag_similarity_score": self.rag_similarity_score,
"decision_tree": self.decision_tree,
"alternative_actions": self.alternative_actions,
"analysis_timestamp": datetime.fromtimestamp(self.detected_at).isoformat(),
"source": self.source.value,
"created_at": datetime.fromtimestamp(self.created_at).isoformat(),
"oss_edition": self.oss_edition,
"is_oss_advisory": self.is_oss_advisory,
"infrastructure_intent": self.infrastructure_intent,
}
def get_execution_summary(self) -> Dict[str, Any]:
"""
Get execution summary (public information)
Safe to share externally
"""
summary = {
"intent_id": self.deterministic_id,
"action": self.action,
"component": self.component,
"confidence": self.confidence,
"confidence_interval": self.confidence_interval,
"risk_score": self.risk_score,
"cost_projection": self.cost_projection,
"status": self.status.value,
"created_at": datetime.fromtimestamp(self.created_at).isoformat(),
"age_seconds": self.age_seconds,
"oss_edition": self.oss_edition,
"requires_enterprise": self.requires_enterprise,
"is_oss_advisory": self.is_oss_advisory,
"source": self.source.value,
"policy_violations_count": len(self.policy_violations) if self.policy_violations else 0,
"confidence_basis": self._get_confidence_basis(),
}
if self.executed_at:
summary["executed_at"] = datetime.fromtimestamp(self.executed_at).isoformat()
summary["execution_duration_seconds"] = self.executed_at - self.created_at
if self.execution_result:
summary["execution_success"] = self.execution_result.get("success", False)
summary["execution_message"] = self.execution_result.get("message", "")
if self.rag_similarity_score:
summary["rag_similarity_score"] = self.rag_similarity_score
if self.similar_incidents:
summary["similar_incidents_count"] = len(self.similar_incidents)
if self.approvals:
summary["approvals_count"] = len(self.approvals)
summary["approved_by"] = [a.get("approver") for a in self.approvals if a.get("approver")]
if self.human_overrides:
summary["overrides_count"] = len(self.human_overrides)
return summary
def is_immutable(self) -> bool:
"""Check if the intent is truly immutable (frozen dataclass property)"""
try:
# Try to modify a field - should raise FrozenInstanceError
object.__setattr__(self, '_test_immutable', True)
return False
except Exception:
return True
def __repr__(self) -> str:
return (
f"HealingIntent("
f"id={self.deterministic_id[:8]}..., "
f"action={self.action}, "
f"component={self.component}, "
f"confidence={self.confidence:.2f}, "
f"risk={self.risk_score:.2f if self.risk_score else 'N/A'}, "
f"status={self.status.value}"
f")"
)
class HealingIntentSerializer:
"""
Versioned serialization for HealingIntent
Enhanced with:
- Probabilistic confidence distribution support
- Risk and cost field serialization
- Backward compatibility with v1.x
- OSS/Enterprise edition detection
"""
SCHEMA_VERSION: ClassVar[str] = "2.0.0"
@classmethod
def serialize(cls, intent: HealingIntent, version: str = "2.0.0") -> Dict[str, Any]:
"""
Serialize HealingIntent with versioning
Args:
intent: HealingIntent to serialize
version: Schema version to use
Returns:
Versioned serialization dictionary
Raises:
SerializationError: If serialization fails
"""
try:
if version == "2.0.0":
return {
"version": version,
"schema_version": cls.SCHEMA_VERSION,
"data": intent.to_dict(include_oss_context=True),
"metadata": {
"serialized_at": time.time(),
"deterministic_id": intent.deterministic_id,
"is_executable": intent.is_executable,
"is_oss_advisory": intent.is_oss_advisory,
"requires_enterprise_upgrade": intent.requires_enterprise_upgrade,
"oss_edition": intent.oss_edition,
"has_probabilistic_confidence": intent.confidence_distribution is not None,
"has_risk_assessment": intent.risk_score is not None,
"has_cost_projection": intent.cost_projection is not None,
}
}
elif version == "1.1.0" or version == "1.0.0":
# Backward compatibility with v1.x
data = intent.to_dict(include_oss_context=True)
# Remove v2.0.0 fields for compatibility
data.pop("confidence_distribution", None)
data.pop("risk_score", None)
data.pop("risk_factors", None)
data.pop("cost_projection", None)
data.pop("cost_confidence_interval", None)
data.pop("recommended_action", None)
data.pop("decision_tree", None)
data.pop("alternative_actions", None)
data.pop("risk_profile", None)
data.pop("human_overrides", None)
data.pop("approvals", None)
data.pop("comments", None)
data.pop("infrastructure_intent_id", None)
data.pop("policy_violations", None)
data.pop("infrastructure_intent", None)
# Ensure status is compatible
if data.get("status") in [
IntentStatus.EXECUTING_PARTIAL.value,
IntentStatus.COMPLETED_PARTIAL.value,
IntentStatus.ROLLED_BACK.value,
IntentStatus.PENDING_APPROVAL.value,
IntentStatus.APPROVED.value,
IntentStatus.APPROVED_WITH_OVERRIDES.value
]:
data["status"] = IntentStatus.PENDING_EXECUTION.value
return {
"version": version,
"schema_version": "1.1.0" if version == "1.1.0" else "1.0.0",
"data": data,
"metadata": {
"serialized_at": time.time(),
"deterministic_id": intent.deterministic_id,
"is_executable": intent.is_executable,
"is_oss_advisory": intent.is_oss_advisory,
}
}
else:
raise SerializationError(f"Unsupported version: {version}")
except Exception as e:
raise SerializationError(f"Failed to serialize HealingIntent: {e}") from e
@classmethod
def deserialize(cls, data: Dict[str, Any]) -> HealingIntent:
"""
Deserialize HealingIntent with version detection
Args:
data: Serialized data
Returns:
Deserialized HealingIntent
Raises:
SerializationError: If deserialization fails
"""
try:
version = data.get("version", "1.0.0")
intent_data = data.get("data", data) # Handle both wrapped and unwrapped
if version in ["2.0.0", "1.1.0", "1.0.0"]:
# Handle version differences
if version.startswith("1."):
# Add default values for v2 fields
intent_data.setdefault("confidence_distribution", None)
intent_data.setdefault("risk_score", None)
intent_data.setdefault("risk_factors", None)
intent_data.setdefault("cost_projection", None)
intent_data.setdefault("cost_confidence_interval", None)
intent_data.setdefault("recommended_action", None)
intent_data.setdefault("decision_tree", None)
intent_data.setdefault("alternative_actions", None)
intent_data.setdefault("risk_profile", None)
intent_data.setdefault("human_overrides", [])
intent_data.setdefault("approvals", [])
intent_data.setdefault("comments", [])
intent_data.setdefault("infrastructure_intent_id", None)
intent_data.setdefault("policy_violations", [])
intent_data.setdefault("infrastructure_intent", None)
return HealingIntent.from_dict(intent_data)
else:
raise SerializationError(f"Unsupported version: {version}")
except KeyError as e:
raise SerializationError(f"Missing required field in serialized data: {e}") from e
except Exception as e:
raise SerializationError(f"Failed to deserialize HealingIntent: {e}") from e
@classmethod
def to_json(cls, intent: HealingIntent, pretty: bool = False) -> str:
"""Convert HealingIntent to JSON string"""
try:
serialized = cls.serialize(intent)
if pretty:
return json.dumps(serialized, indent=2, default=str)
else:
return json.dumps(serialized, default=str)
except Exception as e:
raise SerializationError(f"Failed to convert to JSON: {e}") from e
@classmethod
def from_json(cls, json_str: str) -> HealingIntent:
"""Create HealingIntent from JSON string"""
try:
data = json.loads(json_str)
return cls.deserialize(data)
except json.JSONDecodeError as e:
raise SerializationError(f"Invalid JSON: {e}") from e
except Exception as e:
raise SerializationError(f"Failed to parse JSON: {e}") from e
@classmethod
def to_enterprise_json(cls, intent: HealingIntent) -> str:
"""
Convert to Enterprise-ready JSON (excludes OSS context)
This is what should be sent to the Enterprise API
"""
try:
enterprise_request = intent.to_enterprise_request()
return json.dumps(enterprise_request, default=str)
except Exception as e:
raise SerializationError(f"Failed to create Enterprise JSON: {e}") from e
@classmethod
def validate_for_oss(cls, intent: HealingIntent) -> bool:
"""
Validate that HealingIntent complies with OSS boundaries
Returns:
True if intent is valid for OSS edition
"""
try:
# Check OSS edition
if intent.oss_edition != OSS_EDITION:
return False
# Check execution restrictions
if intent.execution_allowed:
return False
# Check similar incidents limit
if intent.similar_incidents and len(intent.similar_incidents) > HealingIntent.MAX_SIMILAR_INCIDENTS:
return False
# Check that frozen dataclass property is preserved
if not intent.is_immutable():
return False
# Check that no execution fields are set
if intent.executed_at is not None or intent.execution_id is not None:
return False
return True
except Exception:
return False
# Factory functions for common use cases
def create_infrastructure_healing_intent(
infrastructure_result: Any, # HealingIntent from infrastructure module
action_mapping: Optional[Dict[str, str]] = None
) -> HealingIntent:
"""
Create a healing intent from infrastructure module analysis result.
This bridges the infrastructure governance module with the main healing system.
Args:
infrastructure_result: The HealingIntent from infrastructure.evaluate()
action_mapping: Optional mapping from infrastructure actions to healing actions
Returns:
HealingIntent ready for the healing system
"""
# Default action mapping
if action_mapping is None:
action_mapping = {
"approve": "execute",
"deny": "block",
"escalate": "escalate",
"defer": "defer"
}
# Extract fields from infrastructure result
recommended_action = getattr(infrastructure_result, 'recommended_action', None)
if recommended_action and hasattr(recommended_action, 'value'):
action = action_mapping.get(recommended_action.value, "review")
else:
action = "review"
# Build parameters
parameters = {
"infrastructure_intent_id": getattr(infrastructure_result, 'intent_id', None),
"risk_score": getattr(infrastructure_result, 'risk_score', None),
"cost_projection": getattr(infrastructure_result, 'cost_projection', None),
"policy_violations": getattr(infrastructure_result, 'policy_violations', []),
"evaluation_details": getattr(infrastructure_result, 'evaluation_details', {})
}
# Build justification
justification_parts = [
getattr(infrastructure_result, 'justification', "Infrastructure analysis completed"),
]
policy_violations = getattr(infrastructure_result, 'policy_violations', [])
if policy_violations:
justification_parts.append(f"Policy violations: {'; '.join(policy_violations)}")
return HealingIntent.from_infrastructure_intent(
infrastructure_intent=getattr(infrastructure_result, 'infrastructure_intent', None),
action=action,
component="infrastructure",
parameters=parameters,
justification=" ".join(justification_parts),
confidence=getattr(infrastructure_result, 'confidence_score', 0.85),
risk_score=getattr(infrastructure_result, 'risk_score', None),
policy_violations=policy_violations,
recommended_action=recommended_action,
source=IntentSource.INFRASTRUCTURE_ANALYSIS
).mark_as_oss_advisory()
def create_rollback_intent(
component: str,
revision: str = "previous",
justification: str = "",
incident_id: str = "",
similar_incidents: Optional[List[Dict[str, Any]]] = None,
rag_similarity_score: Optional[float] = None,
risk_score: Optional[float] = None,
cost_projection: Optional[float] = None,
) -> HealingIntent:
"""Create a rollback healing intent with OSS limits"""
if not justification:
justification = f"Rollback {component} to {revision} revision"
return HealingIntent.from_analysis(
action="rollback",
component=component,
parameters={"revision": revision},
justification=justification,
confidence=0.9,
similar_incidents=similar_incidents,
incident_id=incident_id,
rag_similarity_score=rag_similarity_score,
risk_score=risk_score,
cost_projection=cost_projection,
).mark_as_oss_advisory()
def create_restart_intent(
component: str,
container_id: Optional[str] = None,
justification: str = "",
incident_id: str = "",
similar_incidents: Optional[List[Dict[str, Any]]] = None,
rag_similarity_score: Optional[float] = None,
risk_score: Optional[float] = None,
cost_projection: Optional[float] = None,
) -> HealingIntent:
"""Create a container restart healing intent with OSS limits"""
parameters = {}
if container_id:
parameters["container_id"] = container_id
if not justification:
justification = f"Restart container for {component}"
return HealingIntent.from_analysis(
action="restart_container",
component=component,
parameters=parameters,
justification=justification,
confidence=0.85,
similar_incidents=similar_incidents,
incident_id=incident_id,
rag_similarity_score=rag_similarity_score,
risk_score=risk_score,
cost_projection=cost_projection,
).mark_as_oss_advisory()
def create_scale_out_intent(
component: str,
scale_factor: int = 2,
justification: str = "",
incident_id: str = "",
similar_incidents: Optional[List[Dict[str, Any]]] = None,
rag_similarity_score: Optional[float] = None,
risk_score: Optional[float] = None,
cost_projection: Optional[float] = None,
) -> HealingIntent:
"""Create a scale-out healing intent with OSS limits"""
if not justification:
justification = f"Scale out {component} by factor {scale_factor}"
return HealingIntent.from_analysis(
action="scale_out",
component=component,
parameters={"scale_factor": scale_factor},
justification=justification,
confidence=0.8,
similar_incidents=similar_incidents,
incident_id=incident_id,
rag_similarity_score=rag_similarity_score,
risk_score=risk_score,
cost_projection=cost_projection,
).mark_as_oss_advisory()
def create_oss_advisory_intent(
action: str,
component: str,
parameters: Dict[str, Any],
justification: str,
confidence: float = 0.85,
incident_id: str = "",
risk_score: Optional[float] = None,
cost_projection: Optional[float] = None,
) -> HealingIntent:
"""
Create a generic OSS advisory-only intent
Used when OSS wants to recommend an action without execution capability
"""
return HealingIntent(
action=action,
component=component,
parameters=parameters,
justification=justification,
confidence=confidence,
incident_id=incident_id,
risk_score=risk_score,
cost_projection=cost_projection,
oss_edition=OSS_EDITION,
requires_enterprise=True,
execution_allowed=False,
status=IntentStatus.OSS_ADVISORY_ONLY,
)
# Export
__all__ = [
# Main class
"HealingIntent",
# Supporting classes
"ConfidenceDistribution",
"HealingIntentSerializer",
# Enums
"IntentSource",
"IntentStatus",
"RecommendedAction",
# Exceptions
"HealingIntentError",
"SerializationError",
"ValidationError",
# Factory functions
"create_infrastructure_healing_intent",
"create_rollback_intent",
"create_restart_intent",
"create_scale_out_intent",
"create_oss_advisory_intent",
]