Spaces:

Jaswanth-K
/

cloudsense

Sleeping

App Files Files Community

cloudsense / env /environment.py

Jaswanth1210

Initial commit

529b5a7 2 months ago

raw

history blame contribute delete

20.4 kB

	"""CloudSense RL environment — manages state, steps, and episode lifecycle."""

	import copy
	import json
	from pathlib import Path

	from env.models import ActionType, CloudObservation, StepResult
	from env.reward import compute_reward
	from env.tasks import TASKS
	from env.graders.grader import grade_task

	DATA_DIR = Path(__file__).parent / "data"
	PRICING_FILE = DATA_DIR / "aws_pricing.json"

	# Schedule uptime savings constants
	# Weekday-only: run Mon-Fri (5 of 7 days), pay for 5/7 of the month
	WEEKDAY_ONLY_FACTOR = 5 / 7 # 0.714 — cost multiplier
	# Business hours: run 10hrs/day instead of 24
	BUSINESS_HOURS_FACTOR = 10 / 24 # 0.417 — cost multiplier
	# Combined: Mon-Fri, 10hrs/day
	WEEKDAY_BUSINESS_HOURS = (5 / 7) * (10 / 24) # 0.298 — cost multiplier

	# Action cost multipliers for non-rightsize actions
	LIFECYCLE_POLICY_FACTOR = 0.30 # S3: transition to IA saves ~70%
	AUTOSCALING_FACTOR = 0.80 # ~20% savings from dynamic scaling
	RESERVATION_FACTOR = 0.70 # RI typically saves ~30-40% on on-demand

	with open(PRICING_FILE) as f:
	PRICING = json.load(f)


	class CloudSenseEnv:
	def __init__(self):
	self.task = None
	self.task_id = None
	self.current_resources: list[dict] = []
	self.original_resources: list[dict] = []
	self.actions_history: list[dict] = []
	self.original_cost: float = 0.0
	self.current_cost: float = 0.0
	self.step_count: int = 0
	self.done: bool = False
	self.last_blast_radius: dict \| None = None
	self.last_reward: float = 0.0
	self.last_action_error: str \| None = None

	def reset(self, task_id: str) -> CloudObservation:
	"""Reset environment for a new episode."""
	task_cls = TASKS.get(task_id)
	if task_cls is None:
	raise ValueError(f"Unknown task: {task_id}. Available: {list(TASKS.keys())}")

	self.task = task_cls()
	self.task_id = task_id
	raw = self.task.load_account_raw()
	self.current_resources = copy.deepcopy(raw)
	self.original_resources = copy.deepcopy(raw)
	self.actions_history = []
	self.original_cost = sum(r["monthly_cost"] for r in self.current_resources)
	self.current_cost = self.original_cost
	self.step_count = 0
	self.done = False
	self.last_blast_radius = None
	self.last_reward = 0.0
	self.last_action_error = None

	return self._make_observation()

	def step(self, action: dict) -> StepResult:
	"""Execute one action and return result."""
	if self.task is None:
	raise RuntimeError("Environment not initialized. Call reset() first.")
	if self.done:
	raise RuntimeError("Episode is done. Call reset() to start a new one.")

	self.step_count += 1
	self.last_action_error = None

	# Validate action
	action_type = action.get("action_type", "")
	resource_id = action.get("resource_id", "")

	# Validate action_type
	valid_types = {a.value for a in ActionType}
	if action_type not in valid_types:
	self.last_action_error = f"Invalid action_type: {action_type}"
	return self._make_step_result(0.0)

	# Find the resource
	resource = None
	for r in self.current_resources:
	if r["resource_id"] == resource_id:
	resource = r
	break

	if resource is None:
	self.last_action_error = f"Resource not found: {resource_id}"
	return self._make_step_result(0.0)

	# Validate action_type is applicable to resource type
	error = self._validate_action_for_resource(action_type, resource, action)
	if error:
	self.last_action_error = error
	return self._make_step_result(0.0)

	# Compute blast radius BEFORE applying action
	blast_radius = self._compute_blast_radius(action, resource)

	# Apply action and compute new cost
	cost_before = self.current_cost
	self._apply_action(action, resource)
	cost_after = sum(r["monthly_cost"] for r in self.current_resources)
	self.current_cost = cost_after

	# Compute reward — raw for grader, api for response
	# Pass current action's blast radius so reward reflects THIS action's impact
	raw_reward, api_reward = compute_reward(
	action=action,
	resource=resource,
	task=self.task,
	account_state=self.current_resources,
	actions_history=self.actions_history,
	original_cost=self.original_cost,
	current_cost=cost_before,
	cost_after_action=cost_after,
	blast_radius=blast_radius,
	)

	self.actions_history.append({**action, "_raw_reward": raw_reward})
	self.last_blast_radius = blast_radius
	self.last_reward = api_reward

	# Check done condition
	self.done = self._is_done()

	info = {"blast_radius": blast_radius}

	# If done, compute final grader score
	if self.done:
	task_score = grade_task(
	self.task_id,
	self.actions_history,
	self.state(),
	{},
	)
	info["task_score"] = task_score

	return self._make_step_result(api_reward, info)

	def state(self) -> dict:
	"""Return full current environment state."""
	return {
	"task_id": self.task_id,
	"resources": copy.deepcopy(self.current_resources),
	"original_cost": self.original_cost,
	"current_cost": self.current_cost,
	"step_count": self.step_count,
	"max_steps": self.task.max_steps if self.task else 0,
	"done": self.done,
	"actions_history": list(self.actions_history),
	}

	def close(self):
	"""Clean up environment state."""
	self.task = None
	self.task_id = None
	self.current_resources = []
	self.original_resources = []
	self.actions_history = []
	self.original_cost = 0.0
	self.current_cost = 0.0
	self.step_count = 0
	self.done = False
	self.last_blast_radius = None
	self.last_reward = 0.0
	self.last_action_error = None

	def _make_observation(self) -> CloudObservation:
	"""Build an observation from current state."""
	action_savings = self.task.get_action_savings() if self.task else {}
	total_possible_savings = sum(action_savings.values())

	return CloudObservation(
	task_id=self.task_id or "",
	goal=self.task.description if self.task else "",
	account_id=f"account-{self.task_id}",
	resources=copy.deepcopy(self.current_resources),
	monthly_cost_current=self.current_cost,
	monthly_cost_optimized=self.current_cost - total_possible_savings,
	total_possible_savings=total_possible_savings,
	actions_taken=[dict(a) for a in self.actions_history],
	warnings=[],
	step_number=self.step_count,
	max_steps=self.task.max_steps if self.task else 0,
	last_reward=self.last_reward,
	last_action_error=self.last_action_error,
	info={"blast_radius": self.last_blast_radius} if self.last_blast_radius else {},
	)

	def _make_step_result(self, reward: float, info: dict \| None = None) -> StepResult:
	"""Build a StepResult."""
	obs = self._make_observation()
	if info is None:
	info = {}
	if self.last_blast_radius and "blast_radius" not in info:
	info["blast_radius"] = self.last_blast_radius
	if "blast_radius" not in info:
	info["blast_radius"] = {"affected_resources": [], "risk_level": "none", "explanation": ""}

	return StepResult(
	observation=obs,
	reward=reward,
	done=self.done,
	info=info,
	)

	def _validate_action_for_resource(self, action_type: str, resource: dict, action: dict = None) -> str \| None:
	"""Return error message if action is invalid for this resource type, else None."""
	rtype = resource.get("resource_type", "")

	if action_type == ActionType.add_lifecycle_policy.value and rtype != "s3":
	return f"add_lifecycle_policy only applies to S3, not {rtype}"
	if action_type == ActionType.change_storage_class.value and rtype != "s3":
	return f"change_storage_class only applies to S3, not {rtype}"
	if action_type == ActionType.enable_autoscaling.value and rtype not in ("ec2", "kubernetes"):
	return f"enable_autoscaling only applies to EC2/K8s, not {rtype}"
	if action_type == ActionType.rightsize_resource.value:
	if rtype in ("s3", "ebs", "eip", "nat_gateway", "load_balancer"):
	return f"rightsize_resource does not apply to {rtype}"
	if action and not action.get("new_config"):
	return "rightsize_resource requires new_config with target configuration"
	if action_type == ActionType.purchase_reservation.value:
	eligible = ("ec2", "rds", "elasticsearch", "kubernetes")
	if rtype not in eligible:
	return f"purchase_reservation only applies to {', '.join(eligible)}, not {rtype}"

	return None

	def _apply_action(self, action: dict, resource: dict):
	"""Simulate the cost impact of an action on a resource."""
	action_type = action.get("action_type", "")
	new_config = action.get("new_config", {})

	if action_type == ActionType.terminate_resource.value:
	resource["monthly_cost"] = 0.0
	resource["current_config"]["terminated"] = True

	elif action_type == ActionType.rightsize_resource.value:
	if new_config:
	new_cost = self._compute_new_cost(resource, new_config)
	if new_cost is not None:
	resource["monthly_cost"] = new_cost
	resource["current_config"].update(new_config)

	elif action_type == ActionType.add_lifecycle_policy.value:
	resource["monthly_cost"] = round(resource["monthly_cost"] * LIFECYCLE_POLICY_FACTOR, 2)
	resource["current_config"]["lifecycle_policy"] = "transition_to_ia_30d"

	elif action_type == ActionType.change_storage_class.value:
	target = (new_config or {}).get("storage_class", "GLACIER_DEEP_ARCHIVE")
	if target == "GLACIER_DEEP_ARCHIVE":
	resource["monthly_cost"] = round(resource["monthly_cost"] * 0.04, 2)
	elif target == "GLACIER_INSTANT":
	resource["monthly_cost"] = round(resource["monthly_cost"] * 0.17, 2)
	elif target == "INFREQUENT_ACCESS":
	resource["monthly_cost"] = round(resource["monthly_cost"] * 0.54, 2)
	elif target == "STANDARD":
	pass # No cost change — already at standard pricing
	resource["current_config"]["storage_class"] = target

	elif action_type == ActionType.schedule_uptime.value:
	pattern = (new_config or {}).get("schedule", "weekday_only")
	if pattern == "business_hours":
	# Mon-Fri 10hrs/day: cost = original * WEEKDAY_BUSINESS_HOURS
	resource["monthly_cost"] = round(resource["monthly_cost"] * WEEKDAY_BUSINESS_HOURS, 2)
	else:
	# Weekday-only: cost = original * WEEKDAY_ONLY_FACTOR (pay for 5/7 of month)
	resource["monthly_cost"] = round(resource["monthly_cost"] * WEEKDAY_ONLY_FACTOR, 2)

	elif action_type == ActionType.enable_autoscaling.value:
	resource["monthly_cost"] = round(resource["monthly_cost"] * AUTOSCALING_FACTOR, 2)
	resource["current_config"]["autoscaling"] = True

	elif action_type == ActionType.purchase_reservation.value:
	resource["monthly_cost"] = round(resource["monthly_cost"] * RESERVATION_FACTOR, 2)
	resource["current_config"]["reservation_status"] = "reserved"

	# skip_resource and request_more_info don't change cost

	def _compute_new_cost(self, resource: dict, new_config: dict) -> float \| None:
	"""Compute the new monthly cost after rightsizing."""
	rtype = resource.get("resource_type", "")
	new_instance = new_config.get("instance_type", "")

	if rtype == "ec2" and new_instance:
	cost = PRICING.get("ec2", {}).get(new_instance)
	if cost is not None:
	return round(cost, 2)

	elif rtype == "rds" and new_instance:
	instance_cost = PRICING.get("rds", {}).get(new_instance)
	if instance_cost is not None:
	storage_gb = new_config.get("storage_gb", resource["current_config"].get("storage_gb", 0))
	storage_cost = storage_gb * PRICING["ebs"]["gp2_per_gb"]
	return round(instance_cost + storage_cost, 2)

	elif rtype == "elasticsearch" and new_instance:
	node_cost = PRICING.get("elasticsearch", {}).get(new_instance)
	if node_cost is not None:
	node_count = new_config.get("node_count", 1)
	return round(node_cost * node_count, 2)

	elif rtype == "kubernetes":
	node_type = new_config.get("node_type", "")
	node_count = new_config.get("node_count", 1)
	key = f"per_node_{node_type.replace('.', '_')}"
	per_node = PRICING.get("kubernetes", {}).get(key)
	if per_node is not None:
	base = PRICING["kubernetes"]["cluster_base"]
	return round(base + per_node * node_count, 2)

	return None

	def _compute_blast_radius(self, action: dict, resource: dict) -> dict:
	"""Compute cascading impact of an action using transitive BFS.

	BFS through dependency graph: if ELB → EC2 → RDS, terminating ELB
	shows BOTH EC2 AND RDS as affected (not just direct dependents).
	"""
	action_type = action.get("action_type", "")
	resource_id = action.get("resource_id", "")

	# Skip/info actions have no blast radius
	if action_type in (ActionType.skip_resource.value, ActionType.request_more_info.value):
	return {"affected_resources": [], "risk_level": "none", "explanation": ""}

	affected = []
	risk_level = "none"
	explanation = ""

	if action_type == ActionType.terminate_resource.value:
	# BFS through dependency graph — find ALL transitively affected resources
	visited = set()
	queue = [resource_id]

	while queue:
	current_id = queue.pop(0)
	for r in self.current_resources:
	if current_id in r.get("dependencies", []) and r["resource_id"] not in visited:
	visited.add(r["resource_id"])
	affected.append(r["resource_id"])
	queue.append(r["resource_id"]) # follow the chain

	# Check subnet-based impact (NAT Gateway)
	if resource.get("resource_type") == "nat_gateway":
	subnet = resource.get("subnet")
	if subnet:
	for r in self.current_resources:
	if r.get("subnet") == subnet and r["resource_id"] != resource_id:
	if r["resource_id"] not in visited:
	visited.add(r["resource_id"])
	affected.append(r["resource_id"])

	# Determine risk level based on affected count and criticality
	has_critical = any(
	r.get("is_critical") for r in self.current_resources
	if r["resource_id"] in visited
	)
	has_prod = any(
	r.get("environment") == "prod" for r in self.current_resources
	if r["resource_id"] in visited
	)

	if len(affected) == 0:
	risk_level = "none"
	explanation = "No dependent resources affected."
	elif resource.get("resource_type") == "nat_gateway" and has_critical:
	risk_level = "critical"
	explanation = (
	f"Terminating this NAT Gateway will cut internet access for "
	f"{len(affected)} resources in subnet {resource.get('subnet')}."
	)
	elif has_prod:
	risk_level = "critical"
	explanation = f"Terminating this resource affects {len(affected)} resources including production."
	elif has_critical:
	risk_level = "high"
	explanation = f"Terminating this resource affects {len(affected)} resources including critical ones."
	elif resource.get("resource_type") == "load_balancer":
	risk_level = "high"
	explanation = (
	f"Terminating this ELB will disconnect {len(affected)} "
	f"instances that route through it."
	)
	elif len(affected) > 2:
	risk_level = "high"
	explanation = f"Terminating this resource affects {len(affected)} dependent resources."
	elif len(affected) > 0:
	risk_level = "medium"
	explanation = f"Terminating this resource affects {len(affected)} dependent resource(s)."

	elif action_type == ActionType.rightsize_resource.value:
	# BFS for rightsize too — affects dependents but one tier lower risk
	visited = set()
	queue = [resource_id]

	while queue:
	current_id = queue.pop(0)
	for r in self.current_resources:
	if current_id in r.get("dependencies", []) and r["resource_id"] not in visited:
	visited.add(r["resource_id"])
	affected.append(r["resource_id"])
	queue.append(r["resource_id"])

	if affected:
	has_critical = any(
	r.get("is_critical") for r in self.current_resources
	if r["resource_id"] in visited
	)
	# Rightsizing keeps service running — reduce risk one tier
	if has_critical:
	risk_level = "high"
	explanation = f"Rightsizing affects {len(affected)} dependent resource(s) including critical ones."
	elif len(affected) > 2:
	risk_level = "medium"
	explanation = f"Rightsizing affects {len(affected)} dependent resource(s)."
	else:
	risk_level = "medium" if resource.get("resource_type") == "rds" else "low"
	explanation = f"Rightsizing affects {len(affected)} dependent resource(s)."

	elif action_type in (ActionType.add_lifecycle_policy.value, ActionType.change_storage_class.value):
	# Minimal blast radius for storage changes
	risk_level = "low" if resource.get("is_critical") else "none"
	if risk_level == "low":
	explanation = "Storage class change on a critical resource — verify access patterns."

	return {
	"affected_resources": affected,
	"risk_level": risk_level,
	"explanation": explanation,
	}

	def _is_done(self) -> bool:
	"""Check if episode should end."""
	if self.task is None:
	return True
	if self.step_count >= self.task.max_steps:
	return True

	# All resources have been actioned
	actioned_ids = {a.get("resource_id") for a in self.actions_history}
	all_ids = {r["resource_id"] for r in self.current_resources}
	if all_ids and actioned_ids >= all_ids:
	return True

	# >95% of possible savings achieved
	action_savings = self.task.get_action_savings()
	total_possible = sum(action_savings.values())
	actual_savings = self.original_cost - self.current_cost
	if total_possible > 0 and actual_savings / total_possible > 0.95:
	return True

	return False