File size: 5,951 Bytes
6252f54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""
EA Digital Twin Simulation Environment for DRL training.

State: 10 capability scores + 3 budget/timeline/risk scalars + 7 domain flags = 20 dims
Action: priority ordering of top-10 capabilities (multinomial sampling)
Reward: business value - dependency violations - budget overrun - risk penalty
"""

import numpy as np
import random
from typing import NamedTuple


class EAScenario(NamedTuple):
    cap_business_values: np.ndarray    # shape (10,) — 0..1
    cap_effort_scores: np.ndarray      # shape (10,) — 0..1
    cap_risk_scores: np.ndarray        # shape (10,) — 0..1
    dependency_matrix: np.ndarray      # shape (10, 10) — dep_matrix[i,j]=1 means i must precede j
    budget_capacity: float             # 0..1 normalised
    timeline_score: float              # months/36
    risk_tolerance: float              # 0..1


# 10 EA capability archetypes (represent real patterns in the graph)
ARCHETYPE_NAMES = [
    "Data Platform",
    "API Management",
    "Customer Portal",
    "Advanced Analytics",
    "Security & Compliance",
    "Process Automation",
    "Cloud Migration",
    "AI/ML Platform",
    "ERP Integration",
    "DevOps Pipeline",
]

# Base business values per archetype (will be perturbed per episode)
BASE_BUSINESS_VALUES = np.array([0.90, 0.80, 0.70, 0.85, 0.95, 0.75, 0.70, 0.80, 0.65, 0.70])
BASE_EFFORT_SCORES = np.array([0.80, 0.50, 0.60, 0.70, 0.60, 0.65, 0.90, 0.85, 0.75, 0.50])
BASE_RISK_SCORES = np.array([0.40, 0.30, 0.30, 0.35, 0.20, 0.40, 0.60, 0.50, 0.55, 0.25])

# Dependency rules: prerequisite → dependent (indices)
BASE_DEPENDENCIES = [
    (0, 1),  # Data Platform → API Management
    (1, 2),  # API Management → Customer Portal
    (0, 3),  # Data Platform → Analytics
    (1, 5),  # API Management → Process Automation
    (6, 7),  # Cloud Migration → AI/ML Platform
    (0, 7),  # Data Platform → AI/ML Platform
]


class EAEnvironment:
    """Simulated Enterprise Architecture environment for REINFORCE training."""

    STATE_DIM = 20
    ACTION_DIM = 10

    def __init__(self, noise_scale: float = 0.1, seed: int | None = None):
        self._rng = np.random.default_rng(seed)
        self._noise = noise_scale
        self.scenario: EAScenario | None = None
        self.current_step = 0

    def reset(self) -> np.ndarray:
        """Generate a new randomised EA scenario and return initial state vector."""
        noise = self._rng.uniform(-self._noise, self._noise, size=10)
        bv = np.clip(BASE_BUSINESS_VALUES + noise, 0.1, 1.0)
        ef = np.clip(BASE_EFFORT_SCORES + self._rng.uniform(-self._noise, self._noise, 10), 0.1, 1.0)
        ri = np.clip(BASE_RISK_SCORES + self._rng.uniform(-self._noise / 2, self._noise / 2, 10), 0.05, 0.9)

        # Randomise dep matrix from base
        dep_matrix = np.zeros((10, 10), dtype=float)
        for (i, j) in BASE_DEPENDENCIES:
            if self._rng.random() > 0.2:  # 80% chance to include each dependency
                dep_matrix[i, j] = 1.0

        budget_capacity = float(self._rng.choice([0.4, 0.6, 0.8, 1.0]))
        timeline_score = float(self._rng.choice([6, 12, 18, 24, 36])) / 36.0
        risk_tolerance = float(self._rng.choice([0.33, 0.67, 1.0]))

        self.scenario = EAScenario(bv, ef, ri, dep_matrix, budget_capacity, timeline_score, risk_tolerance)
        self.current_step = 0
        return self.get_state_vector()

    def get_state_vector(self) -> np.ndarray:
        """Build 20-dim state vector from current scenario."""
        s = self.scenario
        # 7 domain flags — simulate which of 7 EA domain categories are in this scenario
        domain_flags = (s.cap_business_values[:7] > 0.6).astype(float)
        state = np.concatenate([
            s.cap_business_values,        # 10 dims
            [s.budget_capacity],          # 1
            [s.timeline_score],           # 1
            [s.risk_tolerance],           # 1
            domain_flags,                 # 7
        ]).astype(np.float32)
        return state

    def step(self, action_indices: np.ndarray) -> tuple[np.ndarray, float, bool]:
        """
        action_indices: ordered priority list of capability indices (len=10)
        Returns (next_state, reward, done)
        """
        s = self.scenario

        # Base reward: value-weighted rank score
        base_reward = 0.0
        for rank, idx in enumerate(action_indices):
            rank_fraction = rank / len(action_indices)
            base_reward += s.cap_business_values[idx] * (1.0 - rank_fraction)
        base_reward /= len(action_indices)  # normalise to 0..1

        # Dependency penalty
        dep_violations = 0
        for i, dep_i in enumerate(action_indices):
            for j, dep_j in enumerate(action_indices):
                if s.dependency_matrix[dep_j, dep_i] == 1.0 and j < i:
                    dep_violations += 1
        dep_penalty = dep_violations * 0.15

        # Budget penalty — cumulative effort of top-N capped by budget
        cum_effort = 0.0
        budget_penalty = 0.0
        for idx in action_indices:
            cum_effort += s.cap_effort_scores[idx] / 10.0
            if cum_effort > s.budget_capacity:
                budget_penalty += 0.05

        # Risk penalty — high-risk caps in top-3 positions
        risk_penalty = 0.0
        for idx in action_indices[:3]:
            if s.cap_risk_scores[idx] > s.risk_tolerance:
                risk_penalty += s.cap_risk_scores[idx] * 0.2

        reward = float(base_reward - dep_penalty - budget_penalty - risk_penalty)
        reward = max(-1.0, min(2.0, reward))

        self.current_step += 1
        done = True  # single-step environment (one full ordering per episode)
        next_state = self.get_state_vector()
        return next_state, reward, done

    def sample_action(self) -> np.ndarray:
        """Random action for baseline / exploration."""
        return self._rng.permutation(self.ACTION_DIM).astype(np.int64)