Spaces:
Running
Running
| """ | |
| EA Digital Twin Simulation Environment for DRL training. | |
| State: 10 capability scores + 3 budget/timeline/risk scalars + 7 domain flags = 20 dims | |
| Action: priority ordering of top-10 capabilities (multinomial sampling) | |
| Reward: business value - dependency violations - budget overrun - risk penalty | |
| """ | |
| import numpy as np | |
| import random | |
| from typing import NamedTuple | |
| class EAScenario(NamedTuple): | |
| cap_business_values: np.ndarray # shape (10,) — 0..1 | |
| cap_effort_scores: np.ndarray # shape (10,) — 0..1 | |
| cap_risk_scores: np.ndarray # shape (10,) — 0..1 | |
| dependency_matrix: np.ndarray # shape (10, 10) — dep_matrix[i,j]=1 means i must precede j | |
| budget_capacity: float # 0..1 normalised | |
| timeline_score: float # months/36 | |
| risk_tolerance: float # 0..1 | |
| # 10 EA capability archetypes (represent real patterns in the graph) | |
| ARCHETYPE_NAMES = [ | |
| "Data Platform", | |
| "API Management", | |
| "Customer Portal", | |
| "Advanced Analytics", | |
| "Security & Compliance", | |
| "Process Automation", | |
| "Cloud Migration", | |
| "AI/ML Platform", | |
| "ERP Integration", | |
| "DevOps Pipeline", | |
| ] | |
| # Base business values per archetype (will be perturbed per episode) | |
| BASE_BUSINESS_VALUES = np.array([0.90, 0.80, 0.70, 0.85, 0.95, 0.75, 0.70, 0.80, 0.65, 0.70]) | |
| BASE_EFFORT_SCORES = np.array([0.80, 0.50, 0.60, 0.70, 0.60, 0.65, 0.90, 0.85, 0.75, 0.50]) | |
| BASE_RISK_SCORES = np.array([0.40, 0.30, 0.30, 0.35, 0.20, 0.40, 0.60, 0.50, 0.55, 0.25]) | |
| # Dependency rules: prerequisite → dependent (indices) | |
| BASE_DEPENDENCIES = [ | |
| (0, 1), # Data Platform → API Management | |
| (1, 2), # API Management → Customer Portal | |
| (0, 3), # Data Platform → Analytics | |
| (1, 5), # API Management → Process Automation | |
| (6, 7), # Cloud Migration → AI/ML Platform | |
| (0, 7), # Data Platform → AI/ML Platform | |
| ] | |
| class EAEnvironment: | |
| """Simulated Enterprise Architecture environment for REINFORCE training.""" | |
| STATE_DIM = 20 | |
| ACTION_DIM = 10 | |
| def __init__(self, noise_scale: float = 0.1, seed: int | None = None): | |
| self._rng = np.random.default_rng(seed) | |
| self._noise = noise_scale | |
| self.scenario: EAScenario | None = None | |
| self.current_step = 0 | |
| def reset(self) -> np.ndarray: | |
| """Generate a new randomised EA scenario and return initial state vector.""" | |
| noise = self._rng.uniform(-self._noise, self._noise, size=10) | |
| bv = np.clip(BASE_BUSINESS_VALUES + noise, 0.1, 1.0) | |
| ef = np.clip(BASE_EFFORT_SCORES + self._rng.uniform(-self._noise, self._noise, 10), 0.1, 1.0) | |
| ri = np.clip(BASE_RISK_SCORES + self._rng.uniform(-self._noise / 2, self._noise / 2, 10), 0.05, 0.9) | |
| # Randomise dep matrix from base | |
| dep_matrix = np.zeros((10, 10), dtype=float) | |
| for (i, j) in BASE_DEPENDENCIES: | |
| if self._rng.random() > 0.2: # 80% chance to include each dependency | |
| dep_matrix[i, j] = 1.0 | |
| budget_capacity = float(self._rng.choice([0.4, 0.6, 0.8, 1.0])) | |
| timeline_score = float(self._rng.choice([6, 12, 18, 24, 36])) / 36.0 | |
| risk_tolerance = float(self._rng.choice([0.33, 0.67, 1.0])) | |
| self.scenario = EAScenario(bv, ef, ri, dep_matrix, budget_capacity, timeline_score, risk_tolerance) | |
| self.current_step = 0 | |
| return self.get_state_vector() | |
| def get_state_vector(self) -> np.ndarray: | |
| """Build 20-dim state vector from current scenario.""" | |
| s = self.scenario | |
| # 7 domain flags — simulate which of 7 EA domain categories are in this scenario | |
| domain_flags = (s.cap_business_values[:7] > 0.6).astype(float) | |
| state = np.concatenate([ | |
| s.cap_business_values, # 10 dims | |
| [s.budget_capacity], # 1 | |
| [s.timeline_score], # 1 | |
| [s.risk_tolerance], # 1 | |
| domain_flags, # 7 | |
| ]).astype(np.float32) | |
| return state | |
| def step(self, action_indices: np.ndarray) -> tuple[np.ndarray, float, bool]: | |
| """ | |
| action_indices: ordered priority list of capability indices (len=10) | |
| Returns (next_state, reward, done) | |
| """ | |
| s = self.scenario | |
| # Base reward: value-weighted rank score | |
| base_reward = 0.0 | |
| for rank, idx in enumerate(action_indices): | |
| rank_fraction = rank / len(action_indices) | |
| base_reward += s.cap_business_values[idx] * (1.0 - rank_fraction) | |
| base_reward /= len(action_indices) # normalise to 0..1 | |
| # Dependency penalty | |
| dep_violations = 0 | |
| for i, dep_i in enumerate(action_indices): | |
| for j, dep_j in enumerate(action_indices): | |
| if s.dependency_matrix[dep_j, dep_i] == 1.0 and j < i: | |
| dep_violations += 1 | |
| dep_penalty = dep_violations * 0.15 | |
| # Budget penalty — cumulative effort of top-N capped by budget | |
| cum_effort = 0.0 | |
| budget_penalty = 0.0 | |
| for idx in action_indices: | |
| cum_effort += s.cap_effort_scores[idx] / 10.0 | |
| if cum_effort > s.budget_capacity: | |
| budget_penalty += 0.05 | |
| # Risk penalty — high-risk caps in top-3 positions | |
| risk_penalty = 0.0 | |
| for idx in action_indices[:3]: | |
| if s.cap_risk_scores[idx] > s.risk_tolerance: | |
| risk_penalty += s.cap_risk_scores[idx] * 0.2 | |
| reward = float(base_reward - dep_penalty - budget_penalty - risk_penalty) | |
| reward = max(-1.0, min(2.0, reward)) | |
| self.current_step += 1 | |
| done = True # single-step environment (one full ordering per episode) | |
| next_state = self.get_state_vector() | |
| return next_state, reward, done | |
| def sample_action(self) -> np.ndarray: | |
| """Random action for baseline / exploration.""" | |
| return self._rng.permutation(self.ACTION_DIM).astype(np.int64) | |