| """ |
| ARF OSS v3.3.9 - Enterprise Reliability Engine (Backend API only) |
| With integrated Infrastructure Governance Module |
| """ |
|
|
| import os |
| import sys |
| import json |
| import uuid |
| import hashlib |
| import logging |
| import sqlite3 |
| import requests |
| from contextlib import contextmanager |
| from datetime import datetime |
| from enum import Enum |
| from typing import Dict, List, Optional, Any, Tuple |
|
|
| import yaml |
| from fastapi import FastAPI, HTTPException, Depends, status |
| from fastapi.middleware.cors import CORSMiddleware |
| from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials |
| from pydantic import BaseModel, Field, field_validator |
| from pydantic_settings import BaseSettings, SettingsConfigDict |
|
|
| |
| from infrastructure import ( |
| AzureInfrastructureSimulator, |
| RegionAllowedPolicy, |
| CostThresholdPolicy, |
| ProvisionResourceIntent, |
| DeployConfigurationIntent, |
| GrantAccessIntent, |
| ResourceType, |
| Environment, |
| RecommendedAction, |
| ) |
|
|
| |
| from hmc_learner import train_hmc_model |
|
|
| |
| class Settings(BaseSettings): |
| """Application settings loaded from environment variables.""" |
| hf_space_id: str = Field(default='local', alias='SPACE_ID') |
| hf_token: str = Field(default='', alias='HF_TOKEN') |
| data_dir: str = Field( |
| default='/data' if os.path.exists('/data') else './data', |
| alias='DATA_DIR' |
| ) |
| lead_email: str = "petter2025us@outlook.com" |
| calendly_url: str = "https://calendly.com/petter2025us/arf-demo" |
| slack_webhook: str = Field(default='', alias='SLACK_WEBHOOK') |
| sendgrid_api_key: str = Field(default='', alias='SENDGRID_API_KEY') |
| api_key: str = Field( |
| default_factory=lambda: str(uuid.uuid4()), |
| alias='ARF_API_KEY' |
| ) |
| default_confidence_threshold: float = 0.9 |
| default_max_risk: str = "MEDIUM" |
|
|
| model_config = SettingsConfigDict( |
| populate_by_name=True, |
| extra='ignore', |
| env_prefix='', |
| case_sensitive=False |
| ) |
|
|
| def __init__(self, **kwargs): |
| super().__init__(**kwargs) |
| os.makedirs(self.data_dir, exist_ok=True) |
|
|
| settings = Settings() |
|
|
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
| handlers=[ |
| logging.FileHandler(f'{settings.data_dir}/arf.log'), |
| logging.StreamHandler() |
| ] |
| ) |
| logger = logging.getLogger('arf.oss') |
|
|
| |
| class RiskLevel(str, Enum): |
| LOW = "LOW" |
| MEDIUM = "MEDIUM" |
| HIGH = "HIGH" |
| CRITICAL = "CRITICAL" |
|
|
| class ExecutionLevel(str, Enum): |
| AUTONOMOUS_LOW = "AUTONOMOUS_LOW" |
| AUTONOMOUS_HIGH = "AUTONOMOUS_HIGH" |
| SUPERVISED = "SUPERVISED" |
| OPERATOR_REVIEW = "OPERATOR_REVIEW" |
|
|
| class LeadSignal(str, Enum): |
| HIGH_RISK_BLOCKED = "high_risk_blocked" |
| NOVEL_ACTION = "novel_action" |
| POLICY_VIOLATION = "policy_violation" |
| CONFIDENCE_LOW = "confidence_low" |
| REPEATED_FAILURE = "repeated_failure" |
|
|
| |
| class BayesianRiskEngine: |
| """True Bayesian inference with conjugate priors.""" |
| def __init__(self): |
| self.prior_alpha = 2.0 |
| self.prior_beta = 5.0 |
| self.action_priors = { |
| 'database': {'alpha': 1.5, 'beta': 8.0}, |
| 'network': {'alpha': 3.0, 'beta': 4.0}, |
| 'compute': {'alpha': 4.0, 'beta': 3.0}, |
| 'security': {'alpha': 2.0, 'beta': 6.0}, |
| 'default': {'alpha': 2.0, 'beta': 5.0} |
| } |
| self.evidence_db = f"{settings.data_dir}/evidence.db" |
| self._init_db() |
|
|
| def _init_db(self): |
| try: |
| with self._get_db() as conn: |
| conn.execute(''' |
| CREATE TABLE IF NOT EXISTS evidence ( |
| id TEXT PRIMARY KEY, |
| action_type TEXT, |
| action_hash TEXT, |
| success INTEGER, |
| total INTEGER, |
| timestamp TEXT, |
| metadata TEXT |
| ) |
| ''') |
| conn.execute('CREATE INDEX IF NOT EXISTS idx_action_hash ON evidence(action_hash)') |
| except sqlite3.Error as e: |
| logger.error(f"Failed to initialize evidence database: {e}") |
| raise RuntimeError("Could not initialize evidence storage") from e |
|
|
| @contextmanager |
| def _get_db(self): |
| conn = None |
| try: |
| conn = sqlite3.connect(self.evidence_db) |
| yield conn |
| except sqlite3.Error as e: |
| logger.error(f"Database error: {e}") |
| raise |
| finally: |
| if conn: |
| conn.close() |
|
|
| def classify_action(self, action_text: str) -> str: |
| action_lower = action_text.lower() |
| if any(word in action_lower for word in ['database', 'db', 'sql', 'table', 'drop', 'delete']): |
| return 'database' |
| elif any(word in action_lower for word in ['network', 'firewall', 'load balancer']): |
| return 'network' |
| elif any(word in action_lower for word in ['pod', 'container', 'deploy', 'scale']): |
| return 'compute' |
| elif any(word in action_lower for word in ['security', 'cert', 'key', 'access']): |
| return 'security' |
| else: |
| return 'default' |
|
|
| def get_prior(self, action_type: str) -> Tuple[float, float]: |
| prior = self.action_priors.get(action_type, self.action_priors['default']) |
| return prior['alpha'], prior['beta'] |
|
|
| def get_evidence(self, action_hash: str) -> Tuple[int, int]: |
| try: |
| with self._get_db() as conn: |
| cursor = conn.execute( |
| 'SELECT SUM(success), SUM(total) FROM evidence WHERE action_hash = ?', |
| (action_hash[:50],) |
| ) |
| row = cursor.fetchone() |
| return (row[0] or 0, row[1] or 0) if row else (0, 0) |
| except sqlite3.Error as e: |
| logger.error(f"Failed to retrieve evidence: {e}") |
| return (0, 0) |
|
|
| def calculate_posterior(self, action_text: str, context: Dict[str, Any]) -> Dict[str, Any]: |
| action_type = self.classify_action(action_text) |
| alpha0, beta0 = self.get_prior(action_type) |
| action_hash = hashlib.sha256(action_text.encode()).hexdigest() |
| successes, trials = self.get_evidence(action_hash) |
| alpha_n = alpha0 + successes |
| beta_n = beta0 + (trials - successes) |
| posterior_mean = alpha_n / (alpha_n + beta_n) |
| context_multiplier = self._context_likelihood(context) |
| risk_score = posterior_mean * context_multiplier |
| risk_score = min(0.99, max(0.01, risk_score)) |
|
|
| variance = (alpha_n * beta_n) / ((alpha_n + beta_n)**2 * (alpha_n + beta_n + 1)) |
| std_dev = variance ** 0.5 |
| ci_lower = max(0.01, posterior_mean - 1.96 * std_dev) |
| ci_upper = min(0.99, posterior_mean + 1.96 * std_dev) |
|
|
| if risk_score > 0.8: |
| risk_level = RiskLevel.CRITICAL |
| elif risk_score > 0.6: |
| risk_level = RiskLevel.HIGH |
| elif risk_score > 0.4: |
| risk_level = RiskLevel.MEDIUM |
| else: |
| risk_level = RiskLevel.LOW |
|
|
| return { |
| "score": risk_score, |
| "level": risk_level, |
| "credible_interval": [ci_lower, ci_upper], |
| "posterior_parameters": {"alpha": alpha_n, "beta": beta_n}, |
| "prior_used": {"alpha": alpha0, "beta": beta0, "type": action_type}, |
| "evidence_used": {"successes": successes, "trials": trials}, |
| "context_multiplier": context_multiplier, |
| "calculation": f""" |
| Posterior = Beta(Ξ±={alpha_n:.1f}, Ξ²={beta_n:.1f}) |
| Mean = {alpha_n:.1f} / ({alpha_n:.1f} + {beta_n:.1f}) = {posterior_mean:.3f} |
| Γ Context multiplier {context_multiplier:.2f} = {risk_score:.3f} |
| """ |
| } |
|
|
| def _context_likelihood(self, context: Dict) -> float: |
| multiplier = 1.0 |
| if context.get('environment') == 'production': |
| multiplier *= 1.5 |
| elif context.get('environment') == 'staging': |
| multiplier *= 0.8 |
| hour = datetime.now().hour |
| if hour < 6 or hour > 22: |
| multiplier *= 1.3 |
| if context.get('user_role') == 'junior': |
| multiplier *= 1.4 |
| elif context.get('user_role') == 'senior': |
| multiplier *= 0.9 |
| if not context.get('backup_available', True): |
| multiplier *= 1.6 |
| return multiplier |
|
|
| def record_outcome(self, action_text: str, success: bool): |
| action_hash = hashlib.sha256(action_text.encode()).hexdigest() |
| action_type = self.classify_action(action_text) |
| try: |
| with self._get_db() as conn: |
| conn.execute(''' |
| INSERT INTO evidence (id, action_type, action_hash, success, total, timestamp) |
| VALUES (?, ?, ?, ?, ?, ?) |
| ''', ( |
| str(uuid.uuid4()), |
| action_type, |
| action_hash[:50], |
| 1 if success else 0, |
| 1, |
| datetime.utcnow().isoformat() |
| )) |
| conn.commit() |
| logger.info(f"Recorded outcome for {action_type}: success={success}") |
| except sqlite3.Error as e: |
| logger.error(f"Failed to record outcome: {e}") |
|
|
| |
| def enhanced_risk(self, action_text: str, context: Dict, hmc_coeffs: Optional[Dict] = None) -> float: |
| """ |
| Compute a risk score using HMC coefficients if available. |
| Falls back to simple posterior score if no coefficients. |
| """ |
| if hmc_coeffs is None: |
| return self.calculate_posterior(action_text, context)["score"] |
|
|
| |
| action_cat = self.classify_action(action_text) |
| |
| cat_mapping = hmc_coeffs.get("action_cat_mapping", {}) |
| |
| cat_to_code = {v: k for k, v in cat_mapping.items()} |
| cat_code = cat_to_code.get(action_cat, 0) |
|
|
| env_prod = 1 if context.get('environment') == 'production' else 0 |
| role_junior = 1 if context.get('user_role') == 'junior' else 0 |
| hour = datetime.now().hour |
| |
| simple_risk = self.calculate_posterior(action_text, context)["score"] |
| confidence = context.get('confidence', 0.85) |
|
|
| |
| logit = ( |
| hmc_coeffs.get('Ξ±_cat', {}).get('mean', [0])[cat_code] + |
| hmc_coeffs.get('Ξ²_env', {}).get('mean', 0) * env_prod + |
| hmc_coeffs.get('Ξ²_role', {}).get('mean', 0) * role_junior + |
| hmc_coeffs.get('Ξ²_risk', {}).get('mean', 0) * (simple_risk - 0.5) + |
| hmc_coeffs.get('Ξ²_hour', {}).get('mean', 0) * ((hour - 12) / 12) + |
| hmc_coeffs.get('Ξ²_conf', {}).get('mean', 0) * (confidence - 0.5) |
| ) |
| |
| prob = 1 / (1 + np.exp(-logit)) |
| return prob |
|
|
|
|
| class PolicyEngine: |
| """Deterministic OSS policies β advisory only.""" |
| def __init__(self): |
| self.config = { |
| "confidence_threshold": settings.default_confidence_threshold, |
| "max_autonomous_risk": settings.default_max_risk, |
| "risk_thresholds": { |
| RiskLevel.LOW: 0.7, |
| RiskLevel.MEDIUM: 0.5, |
| RiskLevel.HIGH: 0.3, |
| RiskLevel.CRITICAL: 0.1 |
| }, |
| "destructive_patterns": [ |
| r'\bdrop\s+database\b', |
| r'\bdelete\s+from\b', |
| r'\btruncate\b', |
| r'\balter\s+table\b', |
| r'\bdrop\s+table\b', |
| r'\bshutdown\b', |
| r'\bterminate\b', |
| r'\brm\s+-rf\b' |
| ], |
| "require_human": [RiskLevel.CRITICAL, RiskLevel.HIGH], |
| "require_rollback": True |
| } |
|
|
| def evaluate(self, action: str, risk: Dict[str, Any], confidence: float) -> Dict[str, Any]: |
| import re |
| gates = [] |
|
|
| confidence_passed = confidence >= self.config["confidence_threshold"] |
| gates.append({ |
| "gate": "confidence_threshold", |
| "passed": confidence_passed, |
| "threshold": self.config["confidence_threshold"], |
| "actual": confidence, |
| "reason": f"Confidence {confidence:.2f} {'β₯' if confidence_passed else '<'} threshold {self.config['confidence_threshold']}", |
| "type": "numerical" |
| }) |
|
|
| risk_levels = list(RiskLevel) |
| max_idx = risk_levels.index(RiskLevel(self.config["max_autonomous_risk"])) |
| action_idx = risk_levels.index(risk["level"]) |
| risk_passed = action_idx <= max_idx |
| gates.append({ |
| "gate": "risk_assessment", |
| "passed": risk_passed, |
| "max_allowed": self.config["max_autonomous_risk"], |
| "actual": risk["level"].value, |
| "reason": f"Risk level {risk['level'].value} {'β€' if risk_passed else '>'} max autonomous {self.config['max_autonomous_risk']}", |
| "type": "categorical", |
| "metadata": {"risk_score": risk["score"], "credible_interval": risk["credible_interval"]} |
| }) |
|
|
| is_destructive = any(re.search(pattern, action.lower()) for pattern in self.config["destructive_patterns"]) |
| gates.append({ |
| "gate": "destructive_check", |
| "passed": not is_destructive, |
| "is_destructive": is_destructive, |
| "reason": "Non-destructive operation" if not is_destructive else "Destructive operation detected", |
| "type": "boolean", |
| "metadata": {"requires_rollback": is_destructive} |
| }) |
|
|
| requires_human = risk["level"] in self.config["require_human"] |
| gates.append({ |
| "gate": "human_review", |
| "passed": not requires_human, |
| "requires_human": requires_human, |
| "reason": "Human review not required" if not requires_human else f"Human review required for {risk['level'].value} risk", |
| "type": "boolean" |
| }) |
|
|
| gates.append({ |
| "gate": "license_check", |
| "passed": True, |
| "edition": "OSS", |
| "reason": "OSS edition - advisory only", |
| "type": "license" |
| }) |
|
|
| all_passed = all(g["passed"] for g in gates) |
|
|
| if not all_passed: |
| required_level = ExecutionLevel.OPERATOR_REVIEW |
| elif risk["level"] == RiskLevel.LOW: |
| required_level = ExecutionLevel.AUTONOMOUS_LOW |
| elif risk["level"] == RiskLevel.MEDIUM: |
| required_level = ExecutionLevel.AUTONOMOUS_HIGH |
| else: |
| required_level = ExecutionLevel.SUPERVISED |
|
|
| return { |
| "allowed": all_passed, |
| "required_level": required_level.value, |
| "gates": gates, |
| "advisory_only": True, |
| "oss_disclaimer": "OSS edition provides advisory only. Enterprise adds execution." |
| } |
|
|
| def update_config(self, key: str, value: Any): |
| if key in self.config: |
| self.config[key] = value |
| logger.info(f"Policy updated: {key} = {value}") |
| return True |
| return False |
|
|
| |
| |
| |
| class RAGMemory: |
| """Persistent RAG memory with SQLite and sentenceβtransformer embeddings.""" |
| def __init__(self): |
| self.db_path = f"{settings.data_dir}/memory.db" |
| self._init_db() |
| self.embedding_cache = {} |
| self._sentence_model = None |
|
|
| def _get_sentence_model(self): |
| """Lazy load the sentenceβtransformer model.""" |
| if self._sentence_model is None: |
| from sentence_transformers import SentenceTransformer |
| |
| self._sentence_model = SentenceTransformer('all-MiniLM-L6-v2') |
| return self._sentence_model |
|
|
| def _build_incident_text(self, action: str) -> str: |
| """Create a descriptive text from the action.""" |
| |
| return f"Action: {action}" |
|
|
| def _simple_embedding(self, text: str) -> List[float]: |
| """Generate embedding using sentenceβtransformer.""" |
| if text in self.embedding_cache: |
| return self.embedding_cache[text] |
|
|
| model = self._get_sentence_model() |
| |
| embedding = model.encode(text, convert_to_numpy=True).tolist() |
| self.embedding_cache[text] = embedding |
| return embedding |
|
|
| def _ensure_columns(self, conn, columns): |
| """Add columns to incidents table if they do not exist.""" |
| cursor = conn.execute("PRAGMA table_info(incidents)") |
| existing = [row[1] for row in cursor.fetchall()] |
| for col_name, col_type in columns: |
| if col_name not in existing: |
| try: |
| conn.execute(f"ALTER TABLE incidents ADD COLUMN {col_name} {col_type}") |
| logger.info(f"Added column {col_name} to incidents table") |
| except sqlite3.Error as e: |
| logger.error(f"Failed to add column {col_name}: {e}") |
|
|
| def _init_db(self): |
| try: |
| with self._get_db() as conn: |
| conn.execute(''' |
| CREATE TABLE IF NOT EXISTS incidents ( |
| id TEXT PRIMARY KEY, |
| action TEXT, |
| action_hash TEXT, |
| risk_score REAL, |
| risk_level TEXT, |
| confidence REAL, |
| allowed BOOLEAN, |
| gates TEXT, |
| timestamp TEXT, |
| embedding TEXT |
| ) |
| ''') |
| |
| self._ensure_columns(conn, [ |
| ('environment', 'TEXT'), |
| ('user_role', 'TEXT'), |
| ('requires_human', 'BOOLEAN'), |
| ('rollback_feasible', 'BOOLEAN'), |
| ('hour_of_day', 'INTEGER'), |
| ('action_category', 'TEXT') |
| ]) |
| conn.execute(''' |
| CREATE TABLE IF NOT EXISTS signals ( |
| id TEXT PRIMARY KEY, |
| signal_type TEXT, |
| action TEXT, |
| risk_score REAL, |
| metadata TEXT, |
| timestamp TEXT, |
| contacted BOOLEAN DEFAULT 0 |
| ) |
| ''') |
| conn.execute('CREATE INDEX IF NOT EXISTS idx_action_hash ON incidents(action_hash)') |
| conn.execute('CREATE INDEX IF NOT EXISTS idx_signal_type ON signals(signal_type)') |
| conn.execute('CREATE INDEX IF NOT EXISTS idx_signal_contacted ON signals(contacted)') |
| except sqlite3.Error as e: |
| logger.error(f"Failed to initialize memory database: {e}") |
| raise RuntimeError("Could not initialize memory storage") from e |
|
|
| @contextmanager |
| def _get_db(self): |
| conn = None |
| try: |
| conn = sqlite3.connect(self.db_path) |
| conn.row_factory = sqlite3.Row |
| yield conn |
| except sqlite3.Error as e: |
| logger.error(f"Database error in memory: {e}") |
| raise |
| finally: |
| if conn: |
| conn.close() |
|
|
| def store_incident(self, action: str, risk_score: float, risk_level: RiskLevel, |
| confidence: float, allowed: bool, gates: List[Dict], |
| environment: str, user_role: str, requires_human: bool, |
| rollback_feasible: bool, hour_of_day: int, action_category: str): |
| action_hash = hashlib.sha256(action.encode()).hexdigest()[:50] |
| incident_text = self._build_incident_text(action) |
| embedding = json.dumps(self._simple_embedding(incident_text)) |
| try: |
| with self._get_db() as conn: |
| conn.execute(''' |
| INSERT INTO incidents |
| (id, action, action_hash, risk_score, risk_level, confidence, allowed, gates, timestamp, embedding, |
| environment, user_role, requires_human, rollback_feasible, hour_of_day, action_category) |
| VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) |
| ''', ( |
| str(uuid.uuid4()), |
| action[:500], |
| action_hash, |
| risk_score, |
| risk_level.value, |
| confidence, |
| 1 if allowed else 0, |
| json.dumps(gates), |
| datetime.utcnow().isoformat(), |
| embedding, |
| environment, |
| user_role, |
| 1 if requires_human else 0, |
| 1 if rollback_feasible else 0, |
| hour_of_day, |
| action_category |
| )) |
| conn.commit() |
| except sqlite3.Error as e: |
| logger.error(f"Failed to store incident: {e}") |
|
|
| def find_similar(self, action: str, limit: int = 5) -> List[Dict]: |
| |
| query_text = self._build_incident_text(action) |
| query_embedding = self._simple_embedding(query_text) |
| try: |
| with self._get_db() as conn: |
| cursor = conn.execute('SELECT * FROM incidents ORDER BY timestamp DESC LIMIT 100') |
| incidents = [] |
| for row in cursor.fetchall(): |
| stored_embedding = json.loads(row['embedding']) |
| dot = sum(q * s for q, s in zip(query_embedding, stored_embedding)) |
| norm_q = sum(q*q for q in query_embedding) ** 0.5 |
| norm_s = sum(s*s for s in stored_embedding) ** 0.5 |
| similarity = dot / (norm_q * norm_s) if (norm_q > 0 and norm_s > 0) else 0 |
| incidents.append({ |
| 'id': row['id'], |
| 'action': row['action'], |
| 'risk_score': row['risk_score'], |
| 'risk_level': row['risk_level'], |
| 'confidence': row['confidence'], |
| 'allowed': bool(row['allowed']), |
| 'timestamp': row['timestamp'], |
| 'similarity': similarity |
| }) |
| incidents.sort(key=lambda x: x['similarity'], reverse=True) |
| return incidents[:limit] |
| except sqlite3.Error as e: |
| logger.error(f"Failed to find similar incidents: {e}") |
| return [] |
|
|
| def track_enterprise_signal(self, signal_type: LeadSignal, action: str, |
| risk_score: float, metadata: Dict = None): |
| signal = { |
| 'id': str(uuid.uuid4()), |
| 'signal_type': signal_type.value, |
| 'action': action[:200], |
| 'risk_score': risk_score, |
| 'metadata': json.dumps(metadata or {}), |
| 'timestamp': datetime.utcnow().isoformat(), |
| 'contacted': 0 |
| } |
| try: |
| with self._get_db() as conn: |
| conn.execute(''' |
| INSERT INTO signals |
| (id, signal_type, action, risk_score, metadata, timestamp, contacted) |
| VALUES (?, ?, ?, ?, ?, ?, ?) |
| ''', ( |
| signal['id'], |
| signal['signal_type'], |
| signal['action'], |
| signal['risk_score'], |
| signal['metadata'], |
| signal['timestamp'], |
| signal['contacted'] |
| )) |
| conn.commit() |
| except sqlite3.Error as e: |
| logger.error(f"Failed to track signal: {e}") |
| return None |
|
|
| logger.info(f"π Enterprise signal: {signal_type.value} - {action[:50]}...") |
| if signal_type in [LeadSignal.HIGH_RISK_BLOCKED, LeadSignal.NOVEL_ACTION]: |
| self._notify_sales_team(signal) |
| return signal |
|
|
| def _notify_sales_team(self, signal: Dict): |
| if settings.slack_webhook: |
| try: |
| requests.post(settings.slack_webhook, json={ |
| "text": f"π¨ *Enterprise Lead Signal*\n" |
| f"Type: {signal['signal_type']}\n" |
| f"Action: {signal['action']}\n" |
| f"Risk Score: {signal['risk_score']:.2f}\n" |
| f"Time: {signal['timestamp']}\n" |
| f"Contact: {settings.lead_email}" |
| }, timeout=5) |
| except requests.RequestException as e: |
| logger.error(f"Slack notification failed: {e}") |
|
|
| def get_uncontacted_signals(self) -> List[Dict]: |
| try: |
| with self._get_db() as conn: |
| cursor = conn.execute('SELECT * FROM signals WHERE contacted = 0 ORDER BY timestamp DESC') |
| signals = [] |
| for row in cursor.fetchall(): |
| signals.append({ |
| 'id': row['id'], |
| 'signal_type': row['signal_type'], |
| 'action': row['action'], |
| 'risk_score': row['risk_score'], |
| 'metadata': json.loads(row['metadata']), |
| 'timestamp': row['timestamp'] |
| }) |
| return signals |
| except sqlite3.Error as e: |
| logger.error(f"Failed to get uncontacted signals: {e}") |
| return [] |
|
|
| def mark_contacted(self, signal_id: str): |
| try: |
| with self._get_db() as conn: |
| conn.execute('UPDATE signals SET contacted = 1 WHERE id = ?', (signal_id,)) |
| conn.commit() |
| except sqlite3.Error as e: |
| logger.error(f"Failed to mark signal as contacted: {e}") |
|
|
| |
| security = HTTPBearer() |
|
|
| async def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)): |
| if credentials.credentials != settings.api_key: |
| raise HTTPException( |
| status_code=status.HTTP_403_FORBIDDEN, |
| detail="Invalid API key" |
| ) |
| return credentials.credentials |
|
|
| |
| class ActionRequest(BaseModel): |
| proposedAction: str = Field(..., min_length=1, max_length=1000) |
| confidenceScore: float = Field(..., ge=0.0, le=1.0) |
| riskLevel: RiskLevel |
| description: Optional[str] = None |
| requiresHuman: bool = False |
| rollbackFeasible: bool = True |
| user_role: str = "devops" |
| session_id: Optional[str] = None |
|
|
| @field_validator('proposedAction') |
| @classmethod |
| def validate_action(cls, v: str) -> str: |
| if len(v.strip()) == 0: |
| raise ValueError('Action cannot be empty') |
| return v |
|
|
| class ConfigUpdateRequest(BaseModel): |
| confidenceThreshold: Optional[float] = Field(None, ge=0.5, le=1.0) |
| maxAutonomousRisk: Optional[RiskLevel] = None |
|
|
| class GateResult(BaseModel): |
| gate: str |
| reason: str |
| passed: bool |
| threshold: Optional[Any] = None |
| actual: Optional[Any] = None |
| type: str = "boolean" |
| metadata: Optional[Dict] = None |
|
|
| class EvaluationResponse(BaseModel): |
| allowed: bool |
| requiredLevel: str |
| gatesTriggered: List[GateResult] |
| shouldEscalate: bool |
| escalationReason: Optional[str] = None |
| executionLadder: Optional[Dict] = None |
| oss_disclaimer: str = "OSS edition provides advisory only. Enterprise adds mechanical gates and execution." |
|
|
| class LeadSignalResponse(BaseModel): |
| id: str |
| signal_type: str |
| action: str |
| risk_score: float |
| timestamp: str |
| metadata: Dict |
|
|
| |
| class InfrastructureIntentRequest(BaseModel): |
| intent_type: str |
| resource_type: Optional[str] = None |
| region: Optional[str] = None |
| size: Optional[str] = None |
| environment: str = "PROD" |
| requester: str |
| config_content: Optional[Dict[str, Any]] = None |
| permission: Optional[str] = None |
| target: Optional[str] = None |
|
|
| class InfrastructureEvaluationResponse(BaseModel): |
| recommended_action: str |
| justification: str |
| policy_violations: List[str] |
| estimated_cost: Optional[float] |
| risk_score: float |
| confidence_score: float |
| evaluation_details: Dict[str, Any] |
|
|
| |
| hmc_model_data = None |
|
|
| def load_hmc_model(): |
| global hmc_model_data |
| model_path = f"{settings.data_dir}/hmc_model.json" |
| if os.path.exists(model_path): |
| try: |
| with open(model_path, 'r') as f: |
| hmc_model_data = json.load(f) |
| logger.info("HMC model loaded successfully") |
| except Exception as e: |
| logger.error(f"Failed to load HMC model: {e}") |
| hmc_model_data = None |
| else: |
| logger.info("No HMC model found; using default risk engine") |
|
|
| |
| app = FastAPI( |
| title="ARF OSS Real Engine (API Only)", |
| version="3.3.9", |
| description="Real ARF OSS components for enterprise lead generation β backend API only.", |
| contact={ |
| "name": "ARF Sales", |
| "email": settings.lead_email, |
| } |
| ) |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| risk_engine = BayesianRiskEngine() |
| policy_engine = PolicyEngine() |
| memory = RAGMemory() |
| load_hmc_model() |
|
|
| |
| |
| _default_policy = RegionAllowedPolicy(allowed_regions={"eastus", "westeurope"}) & CostThresholdPolicy(500.0) |
| infra_simulator = AzureInfrastructureSimulator( |
| policy=_default_policy, |
| pricing_file="pricing.yml" if os.path.exists("pricing.yml") else None |
| ) |
|
|
| |
|
|
| @app.get("/") |
| async def root(): |
| return { |
| "service": "ARF OSS API", |
| "version": "3.3.9", |
| "status": "operational", |
| "docs": "/docs" |
| } |
|
|
| @app.get("/health") |
| async def health_check(): |
| return { |
| "status": "healthy", |
| "version": "3.3.9", |
| "edition": "OSS", |
| "memory_entries": len(memory.get_uncontacted_signals()), |
| "timestamp": datetime.utcnow().isoformat() |
| } |
|
|
| @app.get("/api/v1/config", dependencies=[Depends(verify_api_key)]) |
| async def get_config(): |
| return { |
| "confidenceThreshold": policy_engine.config["confidence_threshold"], |
| "maxAutonomousRisk": policy_engine.config["max_autonomous_risk"], |
| "riskScoreThresholds": policy_engine.config["risk_thresholds"], |
| "version": "3.3.9", |
| "edition": "OSS" |
| } |
|
|
| @app.post("/api/v1/config", dependencies=[Depends(verify_api_key)]) |
| async def update_config(config: ConfigUpdateRequest): |
| if config.confidenceThreshold: |
| policy_engine.update_config("confidence_threshold", config.confidenceThreshold) |
| if config.maxAutonomousRisk: |
| policy_engine.update_config("max_autonomous_risk", config.maxAutonomousRisk.value) |
| return await get_config() |
|
|
| @app.post("/api/v1/evaluate", dependencies=[Depends(verify_api_key)], response_model=EvaluationResponse) |
| async def evaluate_action(request: ActionRequest): |
| try: |
| context = { |
| "environment": "production", |
| "user_role": request.user_role, |
| "backup_available": request.rollbackFeasible, |
| "requires_human": request.requiresHuman, |
| "confidence": request.confidenceScore |
| } |
| |
| if hmc_model_data: |
| risk_score_val = risk_engine.enhanced_risk(request.proposedAction, context, hmc_model_data) |
| |
| |
| risk = risk_engine.calculate_posterior(request.proposedAction, context) |
| risk["score"] = risk_score_val |
| if risk_score_val > 0.8: |
| risk["level"] = RiskLevel.CRITICAL |
| elif risk_score_val > 0.6: |
| risk["level"] = RiskLevel.HIGH |
| elif risk_score_val > 0.4: |
| risk["level"] = RiskLevel.MEDIUM |
| else: |
| risk["level"] = RiskLevel.LOW |
| |
| else: |
| risk = risk_engine.calculate_posterior(request.proposedAction, context) |
|
|
| policy = policy_engine.evaluate( |
| action=request.proposedAction, |
| risk=risk, |
| confidence=request.confidenceScore |
| ) |
| similar = memory.find_similar(request.proposedAction, limit=3) |
|
|
| |
| environment = context["environment"] |
| user_role = request.user_role |
| requires_human = request.requiresHuman |
| rollback_feasible = request.rollbackFeasible |
| hour_of_day = datetime.now().hour |
| action_category = risk_engine.classify_action(request.proposedAction) |
|
|
| if not policy["allowed"] and risk["score"] > 0.7: |
| memory.track_enterprise_signal( |
| signal_type=LeadSignal.HIGH_RISK_BLOCKED, |
| action=request.proposedAction, |
| risk_score=risk["score"], |
| metadata={ |
| "confidence": request.confidenceScore, |
| "risk_level": risk["level"].value, |
| "failed_gates": [g["gate"] for g in policy["gates"] if not g["passed"]] |
| } |
| ) |
| if len(similar) < 2 and risk["score"] > 0.6: |
| memory.track_enterprise_signal( |
| signal_type=LeadSignal.NOVEL_ACTION, |
| action=request.proposedAction, |
| risk_score=risk["score"], |
| metadata={"similar_count": len(similar)} |
| ) |
| memory.store_incident( |
| action=request.proposedAction, |
| risk_score=risk["score"], |
| risk_level=risk["level"], |
| confidence=request.confidenceScore, |
| allowed=policy["allowed"], |
| gates=policy["gates"], |
| environment=environment, |
| user_role=user_role, |
| requires_human=requires_human, |
| rollback_feasible=rollback_feasible, |
| hour_of_day=hour_of_day, |
| action_category=action_category |
| ) |
| gates = [] |
| for g in policy["gates"]: |
| gates.append(GateResult( |
| gate=g["gate"], |
| reason=g["reason"], |
| passed=g["passed"], |
| threshold=g.get("threshold"), |
| actual=g.get("actual"), |
| type=g.get("type", "boolean"), |
| metadata=g.get("metadata") |
| )) |
| execution_ladder = { |
| "levels": [ |
| {"name": "AUTONOMOUS_LOW", "required": gates[0].passed and gates[1].passed}, |
| {"name": "AUTONOMOUS_HIGH", "required": all(g.passed for g in gates[:3])}, |
| {"name": "SUPERVISED", "required": all(g.passed for g in gates[:4])}, |
| {"name": "OPERATOR_REVIEW", "required": True} |
| ], |
| "current": policy["required_level"] |
| } |
| return EvaluationResponse( |
| allowed=policy["allowed"], |
| requiredLevel=policy["required_level"], |
| gatesTriggered=gates, |
| shouldEscalate=not policy["allowed"], |
| escalationReason=None if policy["allowed"] else "Failed mechanical gates", |
| executionLadder=execution_ladder |
| ) |
| except Exception as e: |
| logger.error(f"Evaluation failed: {e}", exc_info=True) |
| raise HTTPException(status_code=500, detail="Internal server error during evaluation") |
|
|
| @app.get("/api/v1/enterprise/signals", dependencies=[Depends(verify_api_key)]) |
| async def get_enterprise_signals(contacted: bool = False): |
| try: |
| if contacted: |
| signals = memory.get_uncontacted_signals() |
| else: |
| with memory._get_db() as conn: |
| cursor = conn.execute(''' |
| SELECT * FROM signals |
| WHERE datetime(timestamp) > datetime('now', '-30 days') |
| ORDER BY timestamp DESC |
| ''') |
| signals = [] |
| for row in cursor.fetchall(): |
| signals.append({ |
| 'id': row['id'], |
| 'signal_type': row['signal_type'], |
| 'action': row['action'], |
| 'risk_score': row['risk_score'], |
| 'metadata': json.loads(row['metadata']), |
| 'timestamp': row['timestamp'], |
| 'contacted': bool(row['contacted']) |
| }) |
| return {"signals": signals, "count": len(signals)} |
| except Exception as e: |
| logger.error(f"Failed to retrieve signals: {e}") |
| raise HTTPException(status_code=500, detail="Could not retrieve signals") |
|
|
| @app.post("/api/v1/enterprise/signals/{signal_id}/contact", dependencies=[Depends(verify_api_key)]) |
| async def mark_signal_contacted(signal_id: str): |
| memory.mark_contacted(signal_id) |
| return {"status": "success", "message": "Signal marked as contacted"} |
|
|
| @app.get("/api/v1/memory/similar", dependencies=[Depends(verify_api_key)]) |
| async def get_similar_actions(action: str, limit: int = 5): |
| similar = memory.find_similar(action, limit=limit) |
| return {"similar": similar, "count": len(similar)} |
|
|
| @app.post("/api/v1/feedback", dependencies=[Depends(verify_api_key)]) |
| async def record_outcome(action: str, success: bool): |
| risk_engine.record_outcome(action, success) |
| return {"status": "success", "message": "Outcome recorded"} |
|
|
| |
| @app.post("/api/v1/infrastructure/evaluate", dependencies=[Depends(verify_api_key)], response_model=InfrastructureEvaluationResponse) |
| async def evaluate_infrastructure_intent(request: InfrastructureIntentRequest): |
| try: |
| if request.intent_type == "provision": |
| if not all([request.resource_type, request.region, request.size]): |
| raise HTTPException(400, "Missing fields for provision intent") |
| intent = ProvisionResourceIntent( |
| resource_type=request.resource_type.lower(), |
| region=request.region, |
| size=request.size, |
| requester=request.requester, |
| environment=request.environment.lower() |
| ) |
| elif request.intent_type == "deploy": |
| intent = DeployConfigurationIntent( |
| service_name=request.resource_type or "unknown", |
| change_scope="canary", |
| deployment_target=request.environment.lower(), |
| configuration=request.config_content or {}, |
| requester=request.requester |
| ) |
| elif request.intent_type == "grant": |
| intent = GrantAccessIntent( |
| principal=request.requester, |
| permission_level=request.permission or "read", |
| resource_scope=request.target or "/", |
| justification="Requested via API" |
| ) |
| else: |
| raise HTTPException(400, f"Unknown intent type: {request.intent_type}") |
|
|
| healing_intent = infra_simulator.evaluate(intent) |
|
|
| return InfrastructureEvaluationResponse( |
| recommended_action=healing_intent.recommended_action.value, |
| justification=healing_intent.justification, |
| policy_violations=healing_intent.policy_violations, |
| estimated_cost=healing_intent.cost_projection, |
| risk_score=healing_intent.risk_score or 0.0, |
| confidence_score=healing_intent.confidence_score, |
| evaluation_details=healing_intent.evaluation_details |
| ) |
| except HTTPException: |
| raise |
| except Exception as e: |
| logger.error(f"Infrastructure evaluation failed: {e}", exc_info=True) |
| raise HTTPException(500, detail=str(e)) |
|
|
| |
| @app.post("/api/v1/admin/train_hmc", dependencies=[Depends(verify_api_key)]) |
| async def train_hmc(): |
| """Trigger HMC training on historical incident data.""" |
| global hmc_model_data |
| try: |
| db_path = f"{settings.data_dir}/memory.db" |
| model_data = train_hmc_model(db_path, output_dir=settings.data_dir) |
| hmc_model_data = model_data |
| return {"status": "success", "message": "HMC model trained and loaded", "coefficients": model_data.get("coefficients")} |
| except Exception as e: |
| logger.error(f"HMC training failed: {e}", exc_info=True) |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| |
| if __name__ == "__main__": |
| import uvicorn |
| port = int(os.environ.get('PORT', 7860)) |
| logger.info("="*60) |
| logger.info("π ARF OSS v3.3.9 (API Only) Starting") |
| logger.info(f"π Data directory: {settings.data_dir}") |
| logger.info(f"π§ Lead email: {settings.lead_email}") |
| logger.info(f"π API Key: {settings.api_key[:8]}... (set in HF secrets)") |
| logger.info(f"π Serving API at: http://0.0.0.0:{port}") |
| logger.info("="*60) |
| uvicorn.run( |
| "hf_demo:app", |
| host="0.0.0.0", |
| port=port, |
| log_level="info", |
| reload=False |
| ) |