Upload 6 files

ba7b0bc verified about 2 months ago

18.2 kB

	# unihvac/policy.py
	from __future__ import annotations
	import os
	import json
	from typing import Any, Dict, Tuple
	import numpy as np
	import torch
	import torch.nn.functional as F
	import requests
	import numpy as np
	import json

	import requests
	import numpy as np

	class RemoteHTTPPolicy:
	def __init__(self, server_url: str = "http://host.docker.internal:8000"):
	self.server_url = server_url
	self.predict_endpoint = f"{server_url}/predict"
	self.reset_endpoint = f"{server_url}/reset"
	print(f"[RemotePolicy] Connecting to {self.server_url}...")

	def reset(self):
	try:
	requests.post(self.reset_endpoint, json={"message": "reset"})
	print("[RemotePolicy] Remote buffer reset.")
	except Exception as e:
	print(f"[RemotePolicy] Reset failed: {e}")

	def act(self, obs, info, step):
	obs_list = np.array(obs, dtype=np.float32).tolist()
	payload = {"step": int(step), "obs": obs_list, "info": {}}
	try:
	resp = requests.post(self.predict_endpoint, json=payload)
	resp.raise_for_status()
	action = np.array(resp.json()["action"], dtype=np.float32)
	return action, {}, {}
	except Exception as e:
	print(f"[RemotePolicy] Error: {e}")
	return np.array([21.0, 24.0] * 5, dtype=np.float32), {}, {}


	def _get_int_env(name: str, default: int) -> int:
	try:
	v = int(os.environ.get(name, str(default)))
	return v
	except Exception:
	return default


	def _get_bool_env(name: str, default: bool) -> bool:
	v = os.environ.get(name, None)
	if v is None:
	return default
	return v.strip().lower() in ("1", "true", "yes", "y", "on")


	# --------------------------------------------------------------------------------------
	# Policies
	# --------------------------------------------------------------------------------------
	class ConstantSetpointPolicy5Zone:
	"""
	Constant rule-based controller: 5 zones × (htg, clg) each.
	Returns action = [htg, clg] * 5.
	"""
	def __init__(self, heating_sp: float = 21.0, cooling_sp: float = 24.0):
	self.heating_sp = float(heating_sp)
	self.cooling_sp = float(cooling_sp)
	self.action = np.array([self.heating_sp, self.cooling_sp] * 5, dtype=np.float32)

	def reset(self):
	return

	def act(self, obs, info, step):
	return self.action.copy(), {}, {}


	class DecisionTransformerPolicy5Zone:
	"""
	CPU-safe DT policy with robust observation mapping and deadband protection.
	"""

	def __init__(
	self,
	ckpt_path: str,
	model_config_path: str,
	norm_stats_path: str,
	context_len: int,
	max_tokens_per_step: int,
	device: str = "cpu",
	temperature: float = 0.5,
	):
	import dataloader as dl
	from embeddings import GeneralistComfortDT

	# --- 1. CPU Settings ---
	torch.set_grad_enabled(False)
	torch.backends.mha.set_fastpath_enabled(True)
	torch.backends.mkldnn.enabled = _get_bool_env("DT_MKLDNN", True)
	import multiprocessing
	avail = multiprocessing.cpu_count()
	dt_threads = _get_int_env("DT_NUM_THREADS", min(18, avail))
	torch.set_num_threads(dt_threads)
	torch.set_num_interop_threads(1)

	self.dl = dl
	self.device = torch.device("cpu")
	self.temperature = float(temperature)
	# --- 2. Load Model ---
	with open(model_config_path, "r") as f:
	cfg = json.load(f)

	cfg["CONTEXT_LEN"] = int(context_len)
	self.L = int(context_len)
	self.K = int(max_tokens_per_step)

	self.model = GeneralistComfortDT(cfg).to(self.device)
	ckpt = torch.load(ckpt_path, map_location="cpu")
	self.model.load_state_dict(ckpt["model"], strict=True)
	self.model.eval()

	# --- 3. Load Stats ---
	z = np.load(norm_stats_path)
	self.obs_mean = z["obs_mean"].astype(np.float32)
	self.obs_std = z["obs_std"].astype(np.float32)
	self.act_mean = z["act_mean"].astype(np.float32)
	self.act_std = z["act_std"].astype(np.float32)
	self.max_return = float(z["max_return"][0]) if "max_return" in z else 1.0


	self.rtg_scale_mode = "max_return"
	self.rtg_constant_div = 1.0
	self.desired_rtg_raw = -0.5

	self.prev_action = np.array([21.0, 24.0] * 5, dtype=np.float32)

	# --- 4. Define Keys (The Fix) ---

	self.env_keys_order = [
	'month', 'day_of_month', 'hour',
	'outdoor_temp', 'core_temp', 'perim1_temp', 'perim2_temp', 'perim3_temp', 'perim4_temp',
	'elec_power',
	'core_occ_count', 'perim1_occ_count', 'perim2_occ_count', 'perim3_occ_count', 'perim4_occ_count',
	'outdoor_dewpoint', 'outdoor_wetbulb',
	'core_rh', 'perim1_rh', 'perim2_rh', 'perim3_rh', 'perim4_rh',
	'core_ash55_notcomfortable_summer', 'core_ash55_notcomfortable_winter', 'core_ash55_notcomfortable_any',
	'p1_ash55_notcomfortable_any', 'p2_ash55_notcomfortable_any', 'p3_ash55_notcomfortable_any', 'p4_ash55_notcomfortable_any',
	'total_electricity_HVAC'
	]


	self.model_state_keys = [
	'outdoor_temp', 'core_temp', 'perim1_temp', 'perim2_temp', 'perim3_temp', 'perim4_temp',
	'elec_power',
	'core_occ_count', 'perim1_occ_count', 'perim2_occ_count', 'perim3_occ_count', 'perim4_occ_count',
	'outdoor_dewpoint', 'outdoor_wetbulb',
	'core_rh', 'perim1_rh', 'perim2_rh', 'perim3_rh', 'perim4_rh',
	'core_ash55_notcomfortable_summer', 'core_ash55_notcomfortable_winter', 'core_ash55_notcomfortable_any',
	'p1_ash55_notcomfortable_any', 'p2_ash55_notcomfortable_any', 'p3_ash55_notcomfortable_any', 'p4_ash55_notcomfortable_any',
	'month', 'hour'
	]

	self.obs_indices = []
	for k in self.model_state_keys:
	try:
	self.obs_indices.append(self.env_keys_order.index(k))
	except ValueError:
	print(f"Key {k} missing")
	self.obs_indices.append(0) # Fallback
	self.obs_indices = np.array(self.obs_indices, dtype=np.int64)

	self.action_keys = [
	"htg_core", "clg_core", "htg_p1", "clg_p1", "htg_p2", "clg_p2",
	"htg_p3", "clg_p3", "htg_p4", "clg_p4",
	]

	# Meta info
	self.s_meta = [self.dl.parse_feature_identity(k, is_action=False) for k in self.model_state_keys]
	self.a_meta = [self.dl.parse_feature_identity(k, is_action=True) for k in self.action_keys]

	self.num_act = min(len(self.a_meta), self.K)
	self.num_state = min(len(self.s_meta), self.K - self.num_act)

	# --- 5. Precompute Token Layouts ---
	self.row_feat_ids = np.zeros((self.K,), dtype=np.int64)
	self.row_zone_ids = np.zeros((self.K,), dtype=np.int64)
	self.row_attn = np.zeros((self.K,), dtype=np.int64)
	self.row_feat_vals = np.zeros((self.K,), dtype=np.float32)

	if self.num_state > 0:
	s_meta = self.s_meta[:self.num_state]
	self.row_feat_ids[:self.num_state] = np.array([m[0] for m in s_meta], dtype=np.int64)
	self.row_zone_ids[:self.num_state] = np.array([m[1] for m in s_meta], dtype=np.int64)
	self.row_attn[:self.num_state] = 1

	if self.num_act > 0:
	start = self.num_state
	end = start + self.num_act
	a_meta = self.a_meta[:self.num_act]
	self.row_feat_ids[start:end] = np.array([m[0] for m in a_meta], dtype=np.int64)
	self.row_zone_ids[start:end] = np.array([m[1] for m in a_meta], dtype=np.int64)
	self.row_attn[start:end] = 1

	# Context Dimension from Config
	self.context_dim = cfg.get("CONTEXT_DIM", 10)


	# Buffers
	self.buf_feature_ids = torch.zeros((self.L, self.K), dtype=torch.long, device=self.device)
	self.buf_feature_vals = torch.zeros((self.L, self.K), dtype=torch.float32, device=self.device)
	self.buf_zone_ids = torch.zeros((self.L, self.K), dtype=torch.long, device=self.device)
	self.buf_attn = torch.zeros((self.L, self.K), dtype=torch.long, device=self.device)
	self.buf_rtg = torch.zeros((self.L,), dtype=torch.float32, device=self.device)

	# Inputs
	self.t_feature_ids = torch.zeros((1, self.L, self.K), dtype=torch.long, device=self.device)
	self.t_feature_vals = torch.zeros((1, self.L, self.K), dtype=torch.float32, device=self.device)
	self.t_zone_ids = torch.zeros((1, self.L, self.K), dtype=torch.long, device=self.device)
	self.t_attn = torch.zeros((1, self.L, self.K), dtype=torch.long, device=self.device)
	self.t_rtg = torch.zeros((1, self.L), dtype=torch.float32, device=self.device)

	self.ptr = 0
	self.filled = 0

	#Context Buffer
	self.t_context = torch.zeros((1, self.context_dim), dtype=torch.float32, device=self.device)

	def reset(self):
	self.buf_feature_ids.zero_()
	self.buf_feature_vals.zero_()
	self.buf_zone_ids.zero_()
	self.buf_attn.zero_()
	self.buf_rtg.zero_()
	self.t_feature_ids.zero_()
	self.t_feature_vals.zero_()
	self.t_zone_ids.zero_()
	self.t_attn.zero_()
	self.t_rtg.zero_()
	self.prev_action = np.array([21.0, 24.0] * 5, dtype=np.float32)
	self.ptr = 0
	self.filled = 0

	def _decode_bin_to_setpoint(self, bin_id: int, key: str) -> float:
	if "clg" in key.lower() or "cool" in key.lower():
	lo, hi = self.dl.CLG_LOW, self.dl.CLG_HIGH
	else:
	lo, hi = self.dl.HTG_LOW, self.dl.HTG_HIGH
	x = float(bin_id) / float(self.dl.NUM_ACTION_BINS - 1)
	return lo + x * (hi - lo)

	def _scale_rtg(self, rtg_raw: float) -> float:
	if self.rtg_scale_mode == "max_return":
	scale = max(self.max_return, 1e-6)
	return float(rtg_raw) / scale
	return float(rtg_raw) / float(self.rtg_constant_div)

	def _write_model_inputs_from_ring(self):
	if self.filled < self.L:
	start = self.L - self.filled
	self.t_feature_ids.zero_(); self.t_feature_vals.zero_()
	self.t_zone_ids.zero_(); self.t_attn.zero_(); self.t_rtg.zero_()
	self.t_feature_ids[0, start:].copy_(self.buf_feature_ids[: self.filled])
	self.t_feature_vals[0, start:].copy_(self.buf_feature_vals[: self.filled])
	self.t_zone_ids[0, start:].copy_(self.buf_zone_ids[: self.filled])
	self.t_attn[0, start:].copy_(self.buf_attn[: self.filled])
	self.t_rtg[0, start:].copy_(self.buf_rtg[: self.filled])
	return

	p = self.ptr
	n1 = self.L - p
	self.t_feature_ids[0, :n1].copy_(self.buf_feature_ids[p:])
	self.t_feature_vals[0, :n1].copy_(self.buf_feature_vals[p:])
	self.t_zone_ids[0, :n1].copy_(self.buf_zone_ids[p:])
	self.t_attn[0, :n1].copy_(self.buf_attn[p:])
	self.t_rtg[0, :n1].copy_(self.buf_rtg[p:])

	self.t_feature_ids[0, n1:].copy_(self.buf_feature_ids[:p])
	self.t_feature_vals[0, n1:].copy_(self.buf_feature_vals[:p])
	self.t_zone_ids[0, n1:].copy_(self.buf_zone_ids[:p])
	self.t_attn[0, n1:].copy_(self.buf_attn[:p])
	self.t_rtg[0, n1:].copy_(self.buf_rtg[:p])

	def act(self, obs: Any, info: Dict[str, Any], step: int) -> Tuple[np.ndarray, Dict, Dict]:

	# Map raw obs (30 items) model obs (28 items)
	obs_raw = np.asarray(obs, dtype=np.float32)
	env_map = dict(zip(self.env_keys_order, obs_raw))
	obs_ordered = np.array([env_map.get(k, 0.0) for k in self.model_state_keys], dtype=np.float32)

	# --- 2. Normalization ---
	obs_norm = obs_ordered.copy()
	D = min(len(self.obs_mean), obs_norm.shape[0])
	eps = 1e-6
	obs_norm[:D] = (obs_norm[:D] - self.obs_mean[:D]) / (self.obs_std[:D] + eps)



	# =========================================================================
	# 3. CALCULATE CONTEXT VECTOR (Dynamic)
	# =========================================================================

	out_temp = env_map.get('outdoor_temp', 0.0)
	out_dew = env_map.get('outdoor_dewpoint', 0.0)
	hour = env_map.get('hour', 0.0)
	month = env_map.get('month', 1.0)

	occ_total = 0.0
	occ_keys = ['core_occ_count', 'perim1_occ_count', 'perim2_occ_count', 'perim3_occ_count', 'perim4_occ_count']
	for k in occ_keys:
	if env_map.get(k, 0.0) > 0.5: # Binary occupancy check
	occ_total += 1.0
	occ_frac = occ_total / 5.0

	hr_sin = np.sin(2 * np.pi * hour / 24.0)
	hr_cos = np.cos(2 * np.pi * hour / 24.0)
	mth_norm = month - 1.0
	mth_sin = np.sin(2 * np.pi * mth_norm / 12.0)
	mth_cos = np.cos(2 * np.pi * mth_norm / 12.0)

	ctx_vec = np.array([
	out_temp, 0.0, # Temp Mean, Temp Std
	out_dew, # Dewpoint
	occ_frac, # Occ Fraction
	hr_sin, hr_cos, # Hour
	mth_sin, mth_cos, # Month
	0.0, 0.0 # Spares
	], dtype=np.float32)

	self.t_context[0].copy_(torch.from_numpy(ctx_vec))
	act_norm = self.prev_action.copy()
	A = min(len(self.act_mean), act_norm.shape[0])
	act_norm[:A] = (act_norm[:A] - self.act_mean[:A]) / self.act_std[:A]




	self.row_feat_vals.fill(0.0)
	if self.num_state > 0:
	self.row_feat_vals[: self.num_state] = obs_norm[: self.num_state]
	if self.num_act > 0:
	s, e = self.num_state, self.num_state + self.num_act
	if step < 5:
	good_action = np.array([22.0, 25.0] * 5, dtype=np.float32)
	good_norm = good_action.copy()
	A_len = min(len(self.act_mean), good_norm.shape[0])
	good_norm[:A_len] = (good_norm[:A_len] - self.act_mean[:A_len]) / self.act_std[:A_len]
	self.row_feat_vals[s:e] = good_norm[: self.num_act]
	else:
	self.row_feat_vals[s:e] = act_norm[: self.num_act]

	i = self.ptr
	self.buf_feature_ids[i].copy_(torch.as_tensor(self.row_feat_ids, dtype=torch.long))
	self.buf_zone_ids[i].copy_(torch.as_tensor(self.row_zone_ids, dtype=torch.long))
	self.buf_attn[i].copy_(torch.as_tensor(self.row_attn, dtype=torch.long))
	self.buf_feature_vals[i].copy_(torch.as_tensor(self.row_feat_vals, dtype=torch.float32))
	self.buf_rtg[i] = float(self._scale_rtg(self.desired_rtg_raw))

	self.ptr = (self.ptr + 1) % self.L
	self.filled = min(self.filled + 1, self.L)

	self._write_model_inputs_from_ring()
	with torch.inference_mode():
	with torch.amp.autocast(device_type="cpu", dtype=torch.bfloat16):
	out = self.model(self.t_feature_ids, self.t_feature_vals, self.t_zone_ids, self.t_attn, rtg=self.t_rtg, context=self.t_context)



	logits = out["action_logits"]
	last = logits[0, -1] # [K, n_bins]
	s, e = self.num_state, self.num_state + self.num_act
	temp = max(self.temperature, 1e-4)
	raw_logits = last[s:e]
	if torch.isnan(raw_logits).any() or torch.isinf(raw_logits).any():
	raw_logits = torch.nan_to_num(raw_logits, nan=0.0, posinf=10.0, neginf=-10.0)

	# 1. Apply Temperature
	action_logits = raw_logits / temp

	# 2. Convert to Probabilities
	action_probs = F.softmax(action_logits, dim=-1) # [Num_Actions, n_bins]
	if torch.isnan(action_probs).any() or (action_probs < 0).any():
	action_probs = torch.ones_like(action_probs) / action_probs.size(-1)

	# 3. Sample from distribution
	try:
	pred_bins = torch.multinomial(action_probs, num_samples=1).flatten().cpu().numpy().astype(np.int64)
	except RuntimeError as err:
	pred_bins = torch.argmax(action_probs, dim=-1).cpu().numpy().astype(np.int64)

	action = self.prev_action.copy()
	for j in range(self.num_act):
	action[j] = self._decode_bin_to_setpoint(int(pred_bins[j]), self.action_keys[j])

	for j, k in enumerate(self.action_keys):
	if "clg" in k.lower():
	action[j] = float(np.clip(action[j], self.dl.CLG_LOW, self.dl.CLG_HIGH))
	else:
	action[j] = float(np.clip(action[j], self.dl.HTG_LOW, self.dl.HTG_HIGH))
	DEADBAND_GAP = 3.0

	for z in range(5):
	h_idx = 2 * z
	c_idx = 2 * z + 1
	if action[c_idx] < action[h_idx] + DEADBAND_GAP:
	action[c_idx] = min(self.dl.CLG_HIGH, action[h_idx] + DEADBAND_GAP)
	if action[c_idx] < action[h_idx] + DEADBAND_GAP:
	action[h_idx] = max(self.dl.HTG_LOW, action[c_idx] - DEADBAND_GAP)



	if step < 5 or step % 1000 == 0:
	print(f"[DT] Step {step} Raw Bins: {pred_bins}")
	h_val = self._decode_bin_to_setpoint(int(pred_bins[0]), "htg_core")
	c_val = self._decode_bin_to_setpoint(int(pred_bins[1]), "clg_core")
	print(f"[DT] Step {step} Decoded Core: Heat {h_val:.2f} \| Cool {c_val:.2f}")


	self.prev_action = action
	return action, {}, {}

	def make_policy(policy_type: str, **kwargs):
	policy_type = (policy_type or "").lower().strip()
	if policy_type == "dt":
	return DecisionTransformerPolicy5Zone(
	ckpt_path=kwargs["ckpt_path"],
	model_config_path=kwargs["model_config_path"],
	norm_stats_path=kwargs["norm_stats_path"],
	context_len=kwargs["context_len"],
	max_tokens_per_step=kwargs["max_tokens_per_step"],
	device=kwargs.get("device", "cpu"),
	temperature=kwargs.get("temperature", 0.8),
	)
	raise ValueError(f"Unknown policy_type={policy_type}.")