Spaces:
Sleeping
Sleeping
File size: 5,951 Bytes
6252f54 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | """
EA Digital Twin Simulation Environment for DRL training.
State: 10 capability scores + 3 budget/timeline/risk scalars + 7 domain flags = 20 dims
Action: priority ordering of top-10 capabilities (multinomial sampling)
Reward: business value - dependency violations - budget overrun - risk penalty
"""
import numpy as np
import random
from typing import NamedTuple
class EAScenario(NamedTuple):
cap_business_values: np.ndarray # shape (10,) — 0..1
cap_effort_scores: np.ndarray # shape (10,) — 0..1
cap_risk_scores: np.ndarray # shape (10,) — 0..1
dependency_matrix: np.ndarray # shape (10, 10) — dep_matrix[i,j]=1 means i must precede j
budget_capacity: float # 0..1 normalised
timeline_score: float # months/36
risk_tolerance: float # 0..1
# 10 EA capability archetypes (represent real patterns in the graph)
ARCHETYPE_NAMES = [
"Data Platform",
"API Management",
"Customer Portal",
"Advanced Analytics",
"Security & Compliance",
"Process Automation",
"Cloud Migration",
"AI/ML Platform",
"ERP Integration",
"DevOps Pipeline",
]
# Base business values per archetype (will be perturbed per episode)
BASE_BUSINESS_VALUES = np.array([0.90, 0.80, 0.70, 0.85, 0.95, 0.75, 0.70, 0.80, 0.65, 0.70])
BASE_EFFORT_SCORES = np.array([0.80, 0.50, 0.60, 0.70, 0.60, 0.65, 0.90, 0.85, 0.75, 0.50])
BASE_RISK_SCORES = np.array([0.40, 0.30, 0.30, 0.35, 0.20, 0.40, 0.60, 0.50, 0.55, 0.25])
# Dependency rules: prerequisite → dependent (indices)
BASE_DEPENDENCIES = [
(0, 1), # Data Platform → API Management
(1, 2), # API Management → Customer Portal
(0, 3), # Data Platform → Analytics
(1, 5), # API Management → Process Automation
(6, 7), # Cloud Migration → AI/ML Platform
(0, 7), # Data Platform → AI/ML Platform
]
class EAEnvironment:
"""Simulated Enterprise Architecture environment for REINFORCE training."""
STATE_DIM = 20
ACTION_DIM = 10
def __init__(self, noise_scale: float = 0.1, seed: int | None = None):
self._rng = np.random.default_rng(seed)
self._noise = noise_scale
self.scenario: EAScenario | None = None
self.current_step = 0
def reset(self) -> np.ndarray:
"""Generate a new randomised EA scenario and return initial state vector."""
noise = self._rng.uniform(-self._noise, self._noise, size=10)
bv = np.clip(BASE_BUSINESS_VALUES + noise, 0.1, 1.0)
ef = np.clip(BASE_EFFORT_SCORES + self._rng.uniform(-self._noise, self._noise, 10), 0.1, 1.0)
ri = np.clip(BASE_RISK_SCORES + self._rng.uniform(-self._noise / 2, self._noise / 2, 10), 0.05, 0.9)
# Randomise dep matrix from base
dep_matrix = np.zeros((10, 10), dtype=float)
for (i, j) in BASE_DEPENDENCIES:
if self._rng.random() > 0.2: # 80% chance to include each dependency
dep_matrix[i, j] = 1.0
budget_capacity = float(self._rng.choice([0.4, 0.6, 0.8, 1.0]))
timeline_score = float(self._rng.choice([6, 12, 18, 24, 36])) / 36.0
risk_tolerance = float(self._rng.choice([0.33, 0.67, 1.0]))
self.scenario = EAScenario(bv, ef, ri, dep_matrix, budget_capacity, timeline_score, risk_tolerance)
self.current_step = 0
return self.get_state_vector()
def get_state_vector(self) -> np.ndarray:
"""Build 20-dim state vector from current scenario."""
s = self.scenario
# 7 domain flags — simulate which of 7 EA domain categories are in this scenario
domain_flags = (s.cap_business_values[:7] > 0.6).astype(float)
state = np.concatenate([
s.cap_business_values, # 10 dims
[s.budget_capacity], # 1
[s.timeline_score], # 1
[s.risk_tolerance], # 1
domain_flags, # 7
]).astype(np.float32)
return state
def step(self, action_indices: np.ndarray) -> tuple[np.ndarray, float, bool]:
"""
action_indices: ordered priority list of capability indices (len=10)
Returns (next_state, reward, done)
"""
s = self.scenario
# Base reward: value-weighted rank score
base_reward = 0.0
for rank, idx in enumerate(action_indices):
rank_fraction = rank / len(action_indices)
base_reward += s.cap_business_values[idx] * (1.0 - rank_fraction)
base_reward /= len(action_indices) # normalise to 0..1
# Dependency penalty
dep_violations = 0
for i, dep_i in enumerate(action_indices):
for j, dep_j in enumerate(action_indices):
if s.dependency_matrix[dep_j, dep_i] == 1.0 and j < i:
dep_violations += 1
dep_penalty = dep_violations * 0.15
# Budget penalty — cumulative effort of top-N capped by budget
cum_effort = 0.0
budget_penalty = 0.0
for idx in action_indices:
cum_effort += s.cap_effort_scores[idx] / 10.0
if cum_effort > s.budget_capacity:
budget_penalty += 0.05
# Risk penalty — high-risk caps in top-3 positions
risk_penalty = 0.0
for idx in action_indices[:3]:
if s.cap_risk_scores[idx] > s.risk_tolerance:
risk_penalty += s.cap_risk_scores[idx] * 0.2
reward = float(base_reward - dep_penalty - budget_penalty - risk_penalty)
reward = max(-1.0, min(2.0, reward))
self.current_step += 1
done = True # single-step environment (one full ordering per episode)
next_state = self.get_state_vector()
return next_state, reward, done
def sample_action(self) -> np.ndarray:
"""Random action for baseline / exploration."""
return self._rng.permutation(self.ACTION_DIM).astype(np.int64)
|