saravanatanjiro commited on
Commit
184ea7c
·
1 Parent(s): 92bd6fe

Removed training loops below render function

Browse files
Files changed (1) hide show
  1. cloud_arena_final.py +0 -656
cloud_arena_final.py CHANGED
@@ -943,659 +943,3 @@ class CloudArenaEnv(gym.Env):
943
  def render(self): pass
944
 
945
 
946
- # ── Gymnasium wrapper ─────────────────────────────────────────────────────────
947
-
948
- from sb3_contrib.common.wrappers import ActionMasker
949
-
950
- def get_action_masks(env) -> np.ndarray:
951
- """Extract mask through ActionMasker wrapper."""
952
- inner = env
953
- while hasattr(inner, "env"):
954
- inner = inner.env
955
- return inner.action_masks()
956
-
957
-
958
- # ── Sanity checks ─────────────────────────────────────────────────────────────
959
-
960
- print("=" * 60)
961
- print(" CLOUD ARENA — ENVIRONMENT SANITY CHECKS")
962
- print("=" * 60)
963
- print(f" OBS_DIM = {OBS_DIM}")
964
- print(f" N_ACTIONS = {N_ACTIONS}")
965
-
966
- _cr = [0]; _gr = [0]
967
- _e = ActionMasker(CloudArenaEnv(_cr, _gr), get_action_masks)
968
- _obs, _ = _e.reset()
969
- assert _obs.shape == (OBS_DIM,), f"Bad obs shape: {_obs.shape}"
970
- print(f"\n ✅ Obs shape : {_obs.shape}")
971
-
972
- _mask = _e.env.action_masks()
973
- assert _mask.shape == (N_ACTIONS,), f"Bad mask: {_mask.shape}"
974
- print(f" ✅ Mask shape : {_mask.shape}, {_mask.sum()} valid actions")
975
-
976
- # NOOP must be penalized when system is degraded
977
- _cr[0] = 0
978
- _e2 = ActionMasker(CloudArenaEnv(_cr, [0]), get_action_masks)
979
- _e2.reset()
980
- # Manually degrade a resource to force NOOP penalty
981
- _e2.env.resources[1].risk_score = 0.8
982
- _, r_noop, _, _, _ = _e2.step(A_NOOP * MAX_RESOURCES)
983
- print(f" ✅ NOOP (degraded): reward={r_noop:.3f} (should be negative)")
984
-
985
- # Veto: RESIZE_DOWN on a non-overprovisioned resource
986
- _e3 = ActionMasker(CloudArenaEnv([0], [0]), get_action_masks)
987
- _e3.reset()
988
- _e3.env.resources[0].allocated = 0.30
989
- _e3.env.resources[0].usage = 0.28 # not overprovisioned
990
- _, r_v, _, _, i_v = _e3.step(A_RESIZE_DOWN * MAX_RESOURCES)
991
- print(f" ✅ Veto test : veto={i_v.get('veto_rate', '?'):.2f}, r={r_v:.3f}")
992
-
993
- # Phase 0 win condition reachability
994
- _e4 = ActionMasker(CloudArenaEnv([0], [0]), get_action_masks)
995
- _e4.reset()
996
- ic = _e4.env.initial_total_cost
997
- wt = WIN_COST_THR[0]
998
- print(f"\n Phase 0 win target: cost < {ic * wt:.3f} (initial={ic:.3f})")
999
- print(f" Threshold is {wt*100:.0f}% of initial — requires ~{(1-wt)*100:.0f}% cost reduction")
1000
-
1001
- print("\n 8-step walkthrough:")
1002
- _e5 = ActionMasker(CloudArenaEnv([0], [0]), get_action_masks)
1003
- _e5.reset()
1004
- _actions_to_test = [
1005
- A_ANALYZE * MAX_RESOURCES, # analyze resource 0
1006
- A_ANALYZE * MAX_RESOURCES + 1, # analyze resource 1
1007
- A_RESIZE_DOWN * MAX_RESOURCES + 1,# resize down resource 1
1008
- A_RESIZE_DOWN * MAX_RESOURCES + 2,# resize down resource 2
1009
- A_STOP * MAX_RESOURCES + 3, # stop resource 3 (LOW crit)
1010
- A_NOOP * MAX_RESOURCES, # noop
1011
- A_RESIZE_DOWN * MAX_RESOURCES + 1,# resize down again (should veto)
1012
- A_NOOP * MAX_RESOURCES, # noop
1013
- ]
1014
- for i, a in enumerate(_actions_to_test):
1015
- _, r, t, tr, inf = _e5.step(a)
1016
- atype = a // MAX_RESOURCES
1017
- ridx = a % MAX_RESOURCES
1018
- print(f" {i+1}. {['NOOP','ANALYZE','VRF_DEP','RESIZE_DN','RESIZE_UP','STOP','RESTART','DELETE','PATCH','ENCRYPT','RESTRICT','ROT_CRED','EN_LOG','ARCHIVE','OPT_NET'][atype]:<12}"
1019
- f" r{ridx} reward={r:+.3f} win={inf.get('win',0)} savings={inf.get('savings_pct',0):.1f}%")
1020
-
1021
- print("\n ✅ All sanity checks passed — proceed to Cell 3.\n")
1022
-
1023
- # ══════════════════════════════════════════════════════════════════════════════
1024
- # CELL 3 — TRAINING (FINAL FIXED VERSION)
1025
- # ══════════════════════════════════════════════════════════════════════════════
1026
-
1027
- import os, torch, math, sys
1028
- import numpy as np
1029
- import matplotlib.pyplot as plt
1030
-
1031
- from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize, sync_envs_normalization
1032
- from stable_baselines3.common.callbacks import BaseCallback
1033
- from stable_baselines3.common.monitor import Monitor
1034
-
1035
- from sb3_contrib import MaskablePPO
1036
- from sb3_contrib.common.maskable.callbacks import MaskableEvalCallback
1037
-
1038
- torch.manual_seed(GLOBAL_SEED)
1039
-
1040
- for d in ["./logs/", "./eval_logs/", "./models/"]:
1041
- os.makedirs(d, exist_ok=True)
1042
-
1043
- TOTAL_TIMESTEPS = 500_000
1044
- _curriculum_ref = [0]
1045
- _global_step_ref = [0]
1046
-
1047
- # ================================
1048
- # LR SCHEDULE
1049
- # ================================
1050
- def cosine_lr(progress_remaining: float, init_lr: float = 3e-4, min_lr: float = 5e-5):
1051
- return min_lr + (init_lr - min_lr) * 0.5 * (1.0 + math.cos(math.pi * (1.0 - progress_remaining)))
1052
-
1053
- # ================================
1054
- # SAFE EVAL CALLBACK (FIXED)
1055
- # ================================
1056
- class SafeMaskableEvalCallback(MaskableEvalCallback):
1057
- def _on_step(self) -> bool:
1058
- if self.model.get_vec_normalize_env() is not None:
1059
- sync_envs_normalization(self.training_env, self.eval_env)
1060
- return super()._on_step()
1061
-
1062
- # ================================
1063
- # CALLBACK (FIXED UI MATH)
1064
- # ================================
1065
- class CloudArenaCallback(BaseCallback):
1066
- EMA_ALPHA = 0.02
1067
- MIN_EPS_PER_PHASE = 800
1068
- PHASE_THRESHOLDS = {0: 0.65, 1: 0.62, 2: 0.58, 3: 0.55, 4: 0.52}
1069
- PROGRESS_EVERY = 500
1070
-
1071
- def __init__(self, verbose=0):
1072
- super().__init__(verbose)
1073
- self.ema_win_rate = 0.0
1074
- self.current_level = 0
1075
- self._phase_eps = 0
1076
- # Added for dashboard support
1077
- self.episode_rewards = []
1078
- self.episode_wins = []
1079
- self.episode_savings = []
1080
- self.episode_security = []
1081
- self.episode_veto_rates = []
1082
- self.curriculum_log = [(0, 0)]
1083
- self.action_freq = np.zeros(N_ACTION_TYPES)
1084
-
1085
- def _on_step(self) -> bool:
1086
- if self.num_timesteps % self.PROGRESS_EVERY == 0:
1087
- self._print_progress()
1088
-
1089
- # Log actions
1090
- actions = self.locals.get("actions")
1091
- if actions is not None:
1092
- for a in actions:
1093
- atype = int(a) // MAX_RESOURCES
1094
- if atype < N_ACTION_TYPES: self.action_freq[atype] += 1
1095
-
1096
- dones = self.locals.get("dones", [False])
1097
- if dones[0]:
1098
- info = self.locals.get("infos", [{}])[0]
1099
- self._on_episode_end(info)
1100
- return True
1101
-
1102
- def _on_episode_end(self, info: dict):
1103
- if "final_info" in info: info = info["final_info"]
1104
- win = int(info.get("win", 0))
1105
- self.ema_win_rate = ((1 - self.EMA_ALPHA) * self.ema_win_rate + self.EMA_ALPHA * win)
1106
-
1107
- # Logging for Dashboard
1108
- self.episode_rewards.append(float(self.locals.get("rewards", [0])[0]))
1109
- self.episode_wins.append(win)
1110
- self.episode_savings.append(info.get("savings_pct", 0))
1111
- self.episode_security.append(info.get("security_score", 0))
1112
- self.episode_veto_rates.append(info.get("veto_rate", 0))
1113
-
1114
- self._phase_eps += 1
1115
- thr = self.PHASE_THRESHOLDS.get(self.current_level, 0.50)
1116
- if self.current_level < 5 and self._phase_eps >= self.MIN_EPS_PER_PHASE and self.ema_win_rate >= thr:
1117
- self._try_promote()
1118
-
1119
- def _try_promote(self):
1120
- self.current_level += 1
1121
- _curriculum_ref[0] = self.current_level
1122
- self._phase_eps = 0
1123
- self.ema_win_rate = 0.0
1124
- self.curriculum_log.append((self.num_timesteps, self.current_level))
1125
- print(f"\n✄ PROMOTED -> Phase {self.current_level}")
1126
-
1127
- def _print_progress(self):
1128
- pct = min(100.0, self.num_timesteps / TOTAL_TIMESTEPS * 100)
1129
- sys.stdout.write(f"\rProgress: {pct:.1f}% | Phase: {self.current_level} | EMA Win: {self.ema_win_rate*100:.1f}%")
1130
- sys.stdout.flush()
1131
-
1132
- # ================================
1133
- # ENV SETUP
1134
- # ================================
1135
- def make_env():
1136
- env = CloudArenaEnv(_curriculum_ref, _global_step_ref)
1137
- env = Monitor(env)
1138
- return ActionMasker(env, get_action_masks)
1139
-
1140
- train_env = DummyVecEnv([make_env])
1141
- train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10.0)
1142
-
1143
- eval_env = DummyVecEnv([make_env])
1144
- eval_env = VecNormalize(eval_env, norm_obs=True, norm_reward=False, training=False)
1145
- eval_env.obs_rms = train_env.obs_rms
1146
-
1147
- # ================================
1148
- # MODEL & CALLBACK INIT
1149
- # ================================
1150
- model = MaskablePPO("MlpPolicy", train_env, learning_rate=cosine_lr, ent_coef=0.01, verbose=0)
1151
- arena_cb = CloudArenaCallback()
1152
- eval_cb = SafeMaskableEvalCallback(eval_env, best_model_save_path="./models/", eval_freq=10000)
1153
-
1154
- # ================================
1155
- # TRAIN
1156
- # ================================
1157
- print("Starting Pipeline...")
1158
- model.learn(total_timesteps=TOTAL_TIMESTEPS, callback=[arena_cb, eval_cb])
1159
-
1160
- # SAVE FINAL ARTIFACTS
1161
- model.save("./models/cloud_arena_final")
1162
- train_env.save("./models/cloud_arena_vecnorm.pkl")
1163
- print("\n✅ Model and VecNormalize stats saved.")
1164
-
1165
- # ══════════════════════════════════════════════════════════════════════════════
1166
- # CELL 4 — EVALUATION (30 deterministic episodes)
1167
- # ══════════════════════════════════════════════════════════════════════════════
1168
-
1169
- import numpy as np
1170
- from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
1171
- from sb3_contrib import MaskablePPO
1172
-
1173
- print("\n" + "=" * 60)
1174
- print(" EVALUATION — 30 Deterministic Episodes")
1175
- print("=" * 60)
1176
-
1177
- eval_level = arena_cb.current_level
1178
-
1179
- def make_eval_env2(level: int, seed: int = 9999):
1180
- def _init():
1181
- env = CloudArenaEnv(
1182
- curriculum_ref=[level],
1183
- global_step_ref=[TOTAL_TIMESTEPS])
1184
- return ActionMasker(env, get_action_masks)
1185
- return _init
1186
-
1187
- # Ensure artifacts exist before loading
1188
- if not os.path.exists("./models/cloud_arena_vecnorm.pkl"):
1189
- print("❌ Error: vecnorm.pkl not found. Did you finish training in Cell 3?")
1190
- else:
1191
- raw_eval2 = DummyVecEnv([make_eval_env2(eval_level)])
1192
- eval_env2 = VecNormalize.load("./models/cloud_arena_vecnorm.pkl", raw_eval2)
1193
- eval_env2.training = False
1194
- eval_env2.norm_reward = False
1195
-
1196
- eval_model = MaskablePPO.load("./models/cloud_arena_final", env=eval_env2)
1197
-
1198
- def _get_inner(vec_env):
1199
- inner = vec_env.envs[0]
1200
- while hasattr(inner, "env"):
1201
- inner = inner.env
1202
- return inner
1203
-
1204
- N_EVAL = 30
1205
- results = {k: [] for k in ["win","cost_score","security_score","reliability_score",
1206
- "savings_pct","veto_rate","cascade_count","steps"]}
1207
-
1208
- for ep in range(N_EVAL):
1209
- obs = eval_env2.reset()
1210
- done = False; steps = 0
1211
- while not done:
1212
- masks = [_get_inner(eval_env2).action_masks()]
1213
- act, _ = eval_model.predict(obs, deterministic=True, action_masks=masks)
1214
- obs, rew, done_arr, info_arr = eval_env2.step(act)
1215
- done = bool(done_arr[0]); steps += 1
1216
- info = info_arr[0] if info_arr else {}
1217
- for k in results:
1218
- results[k].append(info.get(k, 0) if k != "steps" else steps)
1219
-
1220
- print(f" Ep {ep+1:>2}: {'WIN ' if info.get('win') else 'LOSS'} | "
1221
- f"reward N/A | "
1222
- f"cost={info.get('cost_score',0):.2f} | "
1223
- f"sec={info.get('security_score',0):.2f} | "
1224
- f"sav={info.get('savings_pct',0):.1f}% | "
1225
- f"steps={steps}")
1226
-
1227
- wr = np.mean(results["win"]) * 100
1228
- cost_sc = np.mean(results["cost_score"])
1229
- sec_sc = np.mean(results["security_score"])
1230
- rel_sc = np.mean(results["reliability_score"])
1231
- savings = np.mean(results["savings_pct"])
1232
- veto_r = np.mean(results["veto_rate"]) * 100
1233
- casc_r = np.mean([c > 0 for c in results["cascade_count"]]) * 100
1234
- avg_s = np.mean(results["steps"])
1235
-
1236
- composite = (wr + cost_sc * 100 + sec_sc * 100 + rel_sc * 100) / 4
1237
- tier = ("🥇 PLATINUM" if composite >= 90 else
1238
- "🥈 GOLD" if composite >= 75 else
1239
- "🥉 SILVER" if composite >= 60 else
1240
- "🔶 BRONZE" if composite >= 45 else
1241
- "❌ FAILING")
1242
-
1243
- print(f"\n{'='*40}")
1244
- print(f" EVALUATION RESULTS ({N_EVAL} eps | Phase={eval_level})")
1245
- print(f"{'='*40}")
1246
- print(f" Win Rate : {wr:.1f}%")
1247
- print(f" Cost Score : {cost_sc:.3f}")
1248
- print(f" Security Score : {sec_sc:.3f}")
1249
- print(f" Reliability : {rel_sc:.3f}")
1250
- print(f" Avg Savings : {savings:.1f}%")
1251
- print(f" Veto Rate : {veto_r:.1f}%")
1252
- print(f" Cascade Rate : {casc_r:.1f}%")
1253
- print(f" Avg Steps : {avg_s:.1f}")
1254
- print(f" Composite : {composite:.1f}%")
1255
- print(f"{'='*40}")
1256
- print(f" TIER: {tier}")
1257
- print(f"{'='*40}")
1258
-
1259
- # ══════════════════════════════════════════════════════════════════════════════
1260
- # CELL 5 — BOSS FIGHTS
1261
- # ══════════════════════════════════════════════════════════════════════════════
1262
-
1263
- import os, torch
1264
- import numpy as np
1265
- from typing import List
1266
- from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
1267
- from sb3_contrib import MaskablePPO
1268
-
1269
- print("\n" + "=" * 60)
1270
- print(" BOSS FIGHTS")
1271
- print("=" * 60)
1272
-
1273
- BOSS_NAMES = {
1274
- 1: "Cost Crisis",
1275
- 2: "Security Breach",
1276
- 3: "Infrastructure Failure (NOOP Test)",
1277
- 4: "Traffic Surge",
1278
- 5: "Final Boss (Multi-Failure)",
1279
- }
1280
-
1281
- def _get_inner(vec_env):
1282
- inner = vec_env.envs[0]
1283
- while hasattr(inner, "env"):
1284
- inner = inner.env
1285
- return inner
1286
-
1287
- def run_boss_fight(model, level: int, scenario_id: int, n_runs: int = 10) -> List[dict]:
1288
- results = []
1289
- for seed in range(100, 100 + n_runs):
1290
- def _init():
1291
- env = CloudArenaEnv(curriculum_ref=[level], global_step_ref=[0])
1292
- return ActionMasker(env, get_action_masks)
1293
-
1294
- raw = DummyVecEnv([_init])
1295
- if not os.path.exists("./models/cloud_arena_vecnorm.pkl"):
1296
- raise FileNotFoundError("Missing vecnorm.pkl artifacts.")
1297
-
1298
- vec = VecNormalize.load("./models/cloud_arena_vecnorm.pkl", raw)
1299
- vec.training = False; vec.norm_reward = False
1300
-
1301
- inner = _get_inner(vec)
1302
- # Reset the specific scenario
1303
- raw_obs, _ = inner.reset(seed=seed, options={"scenario": scenario_id})
1304
- obs = vec.normalize_obs(np.array([raw_obs]))
1305
-
1306
- done = False; steps = 0
1307
- noops_chaos = 0; chaos_steps_total = 0
1308
-
1309
- while not done:
1310
- masks = [inner.action_masks()]
1311
- act, _ = model.predict(obs, deterministic=True, action_masks=masks)
1312
- a_type = int(act[0]) // MAX_RESOURCES
1313
-
1314
- if inner.chaos_active:
1315
- chaos_steps_total += 1
1316
- if a_type == A_NOOP: noops_chaos += 1
1317
-
1318
- obs, _, done_arr, info_arr = vec.step(act)
1319
- done = bool(done_arr[0]); steps += 1
1320
-
1321
- info = info_arr[0] if info_arr else {}
1322
- info.update({"steps": steps, "noops_chaos": noops_chaos, "chaos_steps": chaos_steps_total})
1323
- results.append(info)
1324
- vec.close()
1325
- return results
1326
-
1327
- boss_scores = {}
1328
- overall_boss = 0.0
1329
- # Load model if not in memory
1330
- try:
1331
- eval_model = MaskablePPO.load("./models/cloud_arena_final")
1332
- eval_level = _curriculum_ref[0]
1333
- except:
1334
- print("⚠️ Could not auto-load model. Ensure Cell 3 finished.")
1335
-
1336
- for s_id, name in BOSS_NAMES.items():
1337
- print(f"\n▶ Boss Fight {s_id}: {name}")
1338
- try:
1339
- runs = run_boss_fight(eval_model, eval_level, s_id, n_runs=10)
1340
- wins = [r.get("win", 0) for r in runs]
1341
- costs = [r.get("cost_score", 0) for r in runs]
1342
- secs = [r.get("security_score", 0) for r in runs]
1343
- rels = [r.get("reliability_score", 0) for r in runs]
1344
- saves = [r.get("savings_pct", 0) for r in runs]
1345
- casc = [r.get("cascade_count", 0) for r in runs]
1346
- steps = [r.get("steps", MAX_STEPS) for r in runs]
1347
-
1348
- # Scoring Logic
1349
- if s_id == 3: # NOOP Test
1350
- noop_r = [r["noops_chaos"] / max(r["chaos_steps"], 1) for r in runs]
1351
- score = (0.4*np.mean(noop_r) + 0.6*np.mean(rels)) * 100
1352
- else:
1353
- score = (0.4*np.mean(wins) + 0.3*np.mean(costs) + 0.3*np.mean(secs)) * 100
1354
-
1355
- boss_scores[s_id] = score
1356
- print(f" Score: {score:.1f}% | Win Rate: {np.mean(wins)*100:.0f}%")
1357
- except Exception as ex:
1358
- print(f" ⚠️ Error: {ex}")
1359
-
1360
- if boss_scores:
1361
- overall_boss = float(np.mean(list(boss_scores.values())))
1362
- otier = ("🥇 PLATINUM" if overall_boss >= 90 else "🥈 GOLD" if overall_boss >= 75 else "🥉 SILVER")
1363
- print(f"\nFINAL BOSS SCORE: {overall_boss:.1f}% -> {otier}")
1364
-
1365
- # ══════════════════════════════════════════════════════════════════════════════
1366
- # CELL 6 — TRAINING DASHBOARD (9 panels)
1367
- # ══════════════════════════════════════════════════════════════════════════════
1368
-
1369
- import matplotlib
1370
- matplotlib.use("Agg")
1371
- import matplotlib.pyplot as plt
1372
- import matplotlib.gridspec as gridspec
1373
- import numpy as np
1374
- import warnings
1375
- warnings.filterwarnings("ignore")
1376
-
1377
- print("\n📊 Generating training dashboard...")
1378
-
1379
- # Standardized Colors
1380
- BG, PANEL, GRID, TEXT = "#000000", "#0a0f1a", "#1a2030", "#e0e8ff"
1381
- C_REWARD, C_WIN, C_SAVINGS, C_SEC, C_PHASE = "#1e5f8c", "#2ecc71", "#f39c12", "#00cca3", "#7a5fff"
1382
-
1383
- # Data Prep
1384
- rw = np.array(arena_cb.episode_rewards)
1385
- wn = np.array(arena_cb.episode_wins) * 100
1386
- sv = np.array(arena_cb.episode_savings)
1387
- sec = np.array(arena_cb.episode_security)
1388
- ep = np.arange(len(rw))
1389
-
1390
- fig = plt.figure(figsize=(20, 14), facecolor=BG)
1391
- gs = gridspec.GridSpec(3, 3, figure=fig, hspace=0.4, wspace=0.3)
1392
-
1393
- # Helper for smoothing
1394
- def smooth(y, box_pts=50):
1395
- if len(y) < box_pts: return y
1396
- box = np.ones(box_pts)/box_pts
1397
- return np.convolve(y, box, mode='valid')
1398
-
1399
- # --- Panel 1: Rewards ---
1400
- ax1 = fig.add_subplot(gs[0, 0])
1401
- ax1.plot(rw, color=C_REWARD, alpha=0.3)
1402
- ax1.plot(smooth(rw), color="#4a90d9", lw=2)
1403
- ax1.set_title("Episode Rewards", color=TEXT)
1404
-
1405
- # --- Panel 2: Win Rate ---
1406
- ax2 = fig.add_subplot(gs[0, 1])
1407
- ax2.plot(smooth(wn), color=C_WIN, lw=2)
1408
- ax2.set_ylim(0, 105)
1409
- ax2.set_title("Rolling Win Rate (%)", color=TEXT)
1410
-
1411
- # --- Panel 5: Security ---
1412
- ax5 = fig.add_subplot(gs[1, 1])
1413
- ax5.plot(smooth(sec), color=C_SEC, lw=2)
1414
- ax5.set_ylim(0, 1)
1415
- ax5.set_title("Security Score", color=TEXT)
1416
-
1417
- # --- Panel 6: Summary Table ---
1418
- ax6 = fig.add_subplot(gs[1, 2])
1419
- ax6.axis('off')
1420
- summary_text = (
1421
- f"TRAINING SUMMARY\n"
1422
- f"------------------\n"
1423
- f"Episodes: {len(rw)}\n"
1424
- f"Final Phase: {arena_cb.current_level}\n"
1425
- f"EMA Win Rate: {arena_cb.ema_win_rate*100:.1f}%\n"
1426
- f"Avg Savings: {np.mean(sv):.1f}%\n"
1427
- f"Boss Score: {overall_boss:.1f}%\n"
1428
- f"FINAL TIER: {otier if 'otier' in locals() else 'N/A'}"
1429
- )
1430
- ax6.text(0.1, 0.5, summary_text, color=TEXT, fontsize=12, family='monospace', va='center')
1431
-
1432
- # --- Panel 7: Curriculum ---
1433
- ax7 = fig.add_subplot(gs[2, 0])
1434
- if hasattr(arena_cb, 'curriculum_log'):
1435
- steps, lvls = zip(*arena_cb.curriculum_log)
1436
- ax7.step(steps, lvls, where='post', color=C_PHASE, lw=2)
1437
- ax7.set_title("Curriculum Progression", color=TEXT)
1438
-
1439
- # --- Panel 8: Boss Scores ---
1440
- ax8 = fig.add_subplot(gs[2, 1])
1441
- if 'boss_scores' in locals() and boss_scores:
1442
- ax8.bar(BOSS_NAMES.values(), [boss_scores.get(i, 0) for i in BOSS_NAMES], color=C_PHASE)
1443
- ax8.set_xticklabels(BOSS_NAMES.values(), rotation=45, ha='right', fontsize=8)
1444
- ax8.set_title("Boss Fight Performance", color=TEXT)
1445
-
1446
- plt.tight_layout()
1447
- plt.savefig("training_dashboard_full.png", dpi=150, facecolor=BG)
1448
- plt.show()
1449
- print("✅ Dashboard generated: training_dashboard_full.png")
1450
-
1451
- import matplotlib.pyplot as plt
1452
- import numpy as np
1453
- from google.colab import files
1454
-
1455
- # --- Reference Image Color Palette ---
1456
- REF_BG = '#0e1117' # Dark navy/grey background
1457
- REF_CYAN = '#00d4ff' # Reward line
1458
- REF_AMBER = '#ffa500' # Savings line
1459
- REF_NEON = '#39ff14' # Security line
1460
- TEXT_COLOR = '#e6e6e6'
1461
-
1462
- rewards = np.array(arena_cb.episode_rewards)
1463
- savings = np.array(arena_cb.episode_savings)
1464
- security = np.array(arena_cb.episode_security)
1465
-
1466
- def smooth_data(data, window=100):
1467
- if len(data) < window: return data
1468
- return np.convolve(data, np.ones(window)/window, mode='valid')
1469
-
1470
- # --- Plotting ---
1471
- fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(22, 6), facecolor=REF_BG)
1472
-
1473
- for ax in [ax1, ax2, ax3]:
1474
- ax.set_facecolor(REF_BG)
1475
- ax.grid(True, alpha=0.05, color='white', linestyle='-')
1476
- ax.spines['top'].set_visible(False)
1477
- ax.spines['right'].set_visible(False)
1478
- ax.spines['left'].set_color('#333333')
1479
- ax.spines['bottom'].set_color('#333333')
1480
- ax.tick_params(colors=TEXT_COLOR, labelsize=10)
1481
-
1482
- # 1. Learning Curve (Cyan)
1483
- ax1.plot(rewards, color=REF_CYAN, alpha=0.15)
1484
- ax1.plot(smooth_data(rewards), color=REF_CYAN, lw=3)
1485
- ax1.set_title("Learning Curve (Rewards)", color=TEXT_COLOR, fontsize=14, fontweight='bold', pad=20)
1486
- ax1.set_ylabel("Total Reward", color=TEXT_COLOR, alpha=0.8)
1487
-
1488
- # 2. Cost Optimization (Amber)
1489
- ax2.plot(savings, color=REF_AMBER, alpha=0.15)
1490
- ax2.plot(smooth_data(savings), color=REF_AMBER, lw=3)
1491
- ax2.set_title("Cloud Cost Optimization %", color=TEXT_COLOR, fontsize=14, fontweight='bold', pad=20)
1492
- ax2.set_ylabel("Savings Pct", color=TEXT_COLOR, alpha=0.8)
1493
- ax2.set_ylim(0, 100)
1494
-
1495
- # 3. Security Posture (Neon Green)
1496
- ax3.plot(security, color=REF_NEON, alpha=0.15)
1497
- ax3.plot(smooth_data(security), color=REF_NEON, lw=3)
1498
- ax3.set_title("Security Posture Score", color=TEXT_COLOR, fontsize=14, fontweight='bold', pad=20)
1499
- ax3.set_ylabel("Normalized Score", color=TEXT_COLOR, alpha=0.8)
1500
- ax3.set_ylim(0, 1)
1501
-
1502
- plt.tight_layout()
1503
- filename = 'ref_styled_report.png'
1504
- plt.savefig(filename, dpi=200, bbox_inches='tight', facecolor=REF_BG)
1505
- plt.show()
1506
-
1507
- # Trigger Download
1508
- files.download(filename)
1509
-
1510
- import matplotlib.pyplot as plt
1511
- import matplotlib.gridspec as gridspec
1512
- import numpy as np
1513
- from google.colab import files
1514
-
1515
- # --- Color Palette from Reference ---
1516
- REF_BG = '#0e1117'
1517
- REF_CYAN = '#00d4ff'
1518
- REF_AMBER = '#ffa500'
1519
- REF_NEON = '#39ff14'
1520
- REF_PURPLE = '#bc13fe'
1521
- TEXT_COLOR = '#e6e6e6'
1522
-
1523
- # --- Data Extraction ---
1524
- rw = np.array(arena_cb.episode_rewards)
1525
- wn = np.array(arena_cb.episode_wins) * 100
1526
- sv = np.array(arena_cb.episode_savings)
1527
- sec = np.array(arena_cb.episode_security)
1528
- vt = np.array(arena_cb.episode_veto_rates) * 100
1529
- # Reliability as 'Crash Rate' (1 - reliability score)
1530
- cr = (1.0 - np.array(arena_cb.episode_security)) * 100
1531
- # Action frequencies
1532
- act_freq = arena_cb.action_freq
1533
- act_labels = ['NOOP','ANALYZE','VRF_DEP','RESIZE_DN','RESIZE_UP','STOP','RESTART','DELETE','PATCH','ENCRYPT','RESTRICT','ROT_CRED','EN_LOG','ARCHIVE','OPT_NET']
1534
-
1535
- def smooth(y, box_pts=50):
1536
- if len(y) < box_pts: return y
1537
- return np.convolve(y, np.ones(box_pts)/box_pts, mode='valid')
1538
-
1539
- # --- Layout ---
1540
- fig = plt.figure(figsize=(24, 18), facecolor=REF_BG)
1541
- gs = gridspec.GridSpec(3, 3, figure=fig, hspace=0.4, wspace=0.3)
1542
-
1543
- def style_ax(ax, title):
1544
- ax.set_facecolor(REF_BG)
1545
- ax.set_title(title, color=TEXT_COLOR, fontsize=14, fontweight='bold', pad=15)
1546
- ax.tick_params(colors=TEXT_COLOR, labelsize=10)
1547
- ax.grid(True, alpha=0.05, color='white')
1548
- for s in ['top','right']: ax.spines[s].set_visible(False)
1549
- for s in ['left','bottom']: ax.spines[s].set_color('#333333')
1550
-
1551
- # 1. Episode Reward
1552
- ax1 = fig.add_subplot(gs[0, 0]); style_ax(ax1, "Episode Reward")
1553
- ax1.plot(rw, color=REF_CYAN, alpha=0.2)
1554
- ax1.plot(smooth(rw), color=REF_CYAN, lw=2)
1555
-
1556
- # 2. Rolling Win Rate
1557
- ax2 = fig.add_subplot(gs[0, 1]); style_ax(ax2, "Rolling Win Rate (%)")
1558
- ax2.plot(smooth(wn), color=REF_NEON, lw=2)
1559
- ax2.set_ylim(0, 105)
1560
-
1561
- # 3. Rolling Crash Rate
1562
- ax3 = fig.add_subplot(gs[0, 2]); style_ax(ax3, "Rolling Crash Rate (%)")
1563
- ax3.plot(smooth(cr), color='#ff4b2b', lw=2)
1564
- ax3.set_ylim(0, 105)
1565
-
1566
- # 4. Rolling Avg Savings
1567
- ax4 = fig.add_subplot(gs[1, 0]); style_ax(ax4, "Rolling Avg Savings (%)")
1568
- ax4.plot(smooth(sv), color=REF_AMBER, lw=2)
1569
- ax4.set_ylim(0, 105)
1570
-
1571
- # 5. Guard Rail Veto Rate
1572
- ax5 = fig.add_subplot(gs[1, 1]); style_ax(ax5, "Guard Rail Veto Rate (%)")
1573
- ax5.plot(smooth(vt), color=REF_PURPLE, lw=2)
1574
- ax5.set_ylim(0, max(10, np.max(smooth(vt))*1.2))
1575
-
1576
- # 6. Training Summary
1577
- ax6 = fig.add_subplot(gs[1, 2])
1578
- ax6.axis('off')
1579
- summary_txt = (
1580
- f"TRAINING SUMMARY\n"
1581
- f"------------------\n"
1582
- f"Total Episodes: {len(rw)}\n"
1583
- f"Current Phase: {arena_cb.current_level}\n"
1584
- f"Final Win Rate: {wn[-1]:.1f}%\n"
1585
- f"Mean Savings: {np.mean(sv):.1f}%\n"
1586
- f"Avg Veto Rate: {np.mean(vt):.2f}%\n"
1587
- f"Status: {'SUCCESS' if wn[-1]>80 else 'TRAINING'}"
1588
- )
1589
- ax6.text(0.1, 0.5, summary_txt, color=TEXT_COLOR, fontsize=16, family='monospace', va='center')
1590
-
1591
- # 7. Action Distribution
1592
- ax7 = fig.add_subplot(gs[2, :]); style_ax(ax7, "Action Execution Frequency")
1593
- ax7.bar(act_labels, act_freq, color=REF_CYAN, alpha=0.8)
1594
- ax7.set_xticklabels(act_labels, rotation=30, ha='right')
1595
-
1596
- plt.tight_layout()
1597
- filename = 'comprehensive_rl_report.png'
1598
- plt.savefig(filename, dpi=150, facecolor=REF_BG)
1599
- plt.show()
1600
-
1601
- files.download(filename)
 
943
  def render(self): pass
944
 
945