Spaces:

TheQuantEd
/

EA_strat_optimizer

Running

App Files Files Community

EA_strat_optimizer / backend /drl /environment.py

TheQuantEd

deploy: AMD EA Strategy Optimizer — Neo4j + FastAPI + Streamlit

6252f54 17 days ago

raw

history blame contribute delete

5.95 kB

	"""
	EA Digital Twin Simulation Environment for DRL training.

	State: 10 capability scores + 3 budget/timeline/risk scalars + 7 domain flags = 20 dims
	Action: priority ordering of top-10 capabilities (multinomial sampling)
	Reward: business value - dependency violations - budget overrun - risk penalty
	"""

	import numpy as np
	import random
	from typing import NamedTuple


	class EAScenario(NamedTuple):
	cap_business_values: np.ndarray # shape (10,) — 0..1
	cap_effort_scores: np.ndarray # shape (10,) — 0..1
	cap_risk_scores: np.ndarray # shape (10,) — 0..1
	dependency_matrix: np.ndarray # shape (10, 10) — dep_matrix[i,j]=1 means i must precede j
	budget_capacity: float # 0..1 normalised
	timeline_score: float # months/36
	risk_tolerance: float # 0..1


	# 10 EA capability archetypes (represent real patterns in the graph)
	ARCHETYPE_NAMES = [
	"Data Platform",
	"API Management",
	"Customer Portal",
	"Advanced Analytics",
	"Security & Compliance",
	"Process Automation",
	"Cloud Migration",
	"AI/ML Platform",
	"ERP Integration",
	"DevOps Pipeline",
	]

	# Base business values per archetype (will be perturbed per episode)
	BASE_BUSINESS_VALUES = np.array([0.90, 0.80, 0.70, 0.85, 0.95, 0.75, 0.70, 0.80, 0.65, 0.70])
	BASE_EFFORT_SCORES = np.array([0.80, 0.50, 0.60, 0.70, 0.60, 0.65, 0.90, 0.85, 0.75, 0.50])
	BASE_RISK_SCORES = np.array([0.40, 0.30, 0.30, 0.35, 0.20, 0.40, 0.60, 0.50, 0.55, 0.25])

	# Dependency rules: prerequisite → dependent (indices)
	BASE_DEPENDENCIES = [
	(0, 1), # Data Platform → API Management
	(1, 2), # API Management → Customer Portal
	(0, 3), # Data Platform → Analytics
	(1, 5), # API Management → Process Automation
	(6, 7), # Cloud Migration → AI/ML Platform
	(0, 7), # Data Platform → AI/ML Platform
	]


	class EAEnvironment:
	"""Simulated Enterprise Architecture environment for REINFORCE training."""

	STATE_DIM = 20
	ACTION_DIM = 10

	def __init__(self, noise_scale: float = 0.1, seed: int \| None = None):
	self._rng = np.random.default_rng(seed)
	self._noise = noise_scale
	self.scenario: EAScenario \| None = None
	self.current_step = 0

	def reset(self) -> np.ndarray:
	"""Generate a new randomised EA scenario and return initial state vector."""
	noise = self._rng.uniform(-self._noise, self._noise, size=10)
	bv = np.clip(BASE_BUSINESS_VALUES + noise, 0.1, 1.0)
	ef = np.clip(BASE_EFFORT_SCORES + self._rng.uniform(-self._noise, self._noise, 10), 0.1, 1.0)
	ri = np.clip(BASE_RISK_SCORES + self._rng.uniform(-self._noise / 2, self._noise / 2, 10), 0.05, 0.9)

	# Randomise dep matrix from base
	dep_matrix = np.zeros((10, 10), dtype=float)
	for (i, j) in BASE_DEPENDENCIES:
	if self._rng.random() > 0.2: # 80% chance to include each dependency
	dep_matrix[i, j] = 1.0

	budget_capacity = float(self._rng.choice([0.4, 0.6, 0.8, 1.0]))
	timeline_score = float(self._rng.choice([6, 12, 18, 24, 36])) / 36.0
	risk_tolerance = float(self._rng.choice([0.33, 0.67, 1.0]))

	self.scenario = EAScenario(bv, ef, ri, dep_matrix, budget_capacity, timeline_score, risk_tolerance)
	self.current_step = 0
	return self.get_state_vector()

	def get_state_vector(self) -> np.ndarray:
	"""Build 20-dim state vector from current scenario."""
	s = self.scenario
	# 7 domain flags — simulate which of 7 EA domain categories are in this scenario
	domain_flags = (s.cap_business_values[:7] > 0.6).astype(float)
	state = np.concatenate([
	s.cap_business_values, # 10 dims
	[s.budget_capacity], # 1
	[s.timeline_score], # 1
	[s.risk_tolerance], # 1
	domain_flags, # 7
	]).astype(np.float32)
	return state

	def step(self, action_indices: np.ndarray) -> tuple[np.ndarray, float, bool]:
	"""
	action_indices: ordered priority list of capability indices (len=10)
	Returns (next_state, reward, done)
	"""
	s = self.scenario

	# Base reward: value-weighted rank score
	base_reward = 0.0
	for rank, idx in enumerate(action_indices):
	rank_fraction = rank / len(action_indices)
	base_reward += s.cap_business_values[idx] * (1.0 - rank_fraction)
	base_reward /= len(action_indices) # normalise to 0..1

	# Dependency penalty
	dep_violations = 0
	for i, dep_i in enumerate(action_indices):
	for j, dep_j in enumerate(action_indices):
	if s.dependency_matrix[dep_j, dep_i] == 1.0 and j < i:
	dep_violations += 1
	dep_penalty = dep_violations * 0.15

	# Budget penalty — cumulative effort of top-N capped by budget
	cum_effort = 0.0
	budget_penalty = 0.0
	for idx in action_indices:
	cum_effort += s.cap_effort_scores[idx] / 10.0
	if cum_effort > s.budget_capacity:
	budget_penalty += 0.05

	# Risk penalty — high-risk caps in top-3 positions
	risk_penalty = 0.0
	for idx in action_indices[:3]:
	if s.cap_risk_scores[idx] > s.risk_tolerance:
	risk_penalty += s.cap_risk_scores[idx] * 0.2

	reward = float(base_reward - dep_penalty - budget_penalty - risk_penalty)
	reward = max(-1.0, min(2.0, reward))

	self.current_step += 1
	done = True # single-step environment (one full ordering per episode)
	next_state = self.get_state_vector()
	return next_state, reward, done

	def sample_action(self) -> np.ndarray:
	"""Random action for baseline / exploration."""
	return self._rng.permutation(self.ACTION_DIM).astype(np.int64)