from __future__ import annotations """ Created on Sun Aug 17 19:47:52 2025 @author: adetu """ # -*- coding: utf-8 -*- """ make_train_jsonl.py — builds training JSONL for AttackPlan v1.1 (and a chat sample) Outputs (saved two folders above your workspace, under scripts/): - train_attackplan.jsonl # one AttackPlan v1.1 per line - train_chat.jsonl # one chat-style example (system/user -> assistant JSON) - train_preview.csv # quick preview of first ~30 items Run (from EditGlm/ as CWD): %run scripts/make_train_jsonl.py --n 400 --seed 7 """ import argparse, json, os, random, sys from pathlib import Path from typing import Dict, Any, List, Tuple import pandas as pd sys.path.insert(0, os.getcwd()) # Your libs from libraries import BikdashGLM as BG from libraries import IreNatJson as INJ ATTACKPLAN_VERSION = "1.1" # Filesystem helpers def _folders_files(Remarks: dict): res = BG.FoldersAndFiles(Remarks) if isinstance(res, tuple): if len(res) == 2: Folders, Files = res return Folders, Files, Remarks elif len(res) == 3: Folders, Files, Remarks = res return Folders, Files, Remarks raise RuntimeError("Unexpected return from BG.FoldersAndFiles(Remarks)") def _read_initial_glm(Files: dict) -> str: with open(Files["initialGlm"], "r", encoding="utf-8") as f: return f.read() def _load_ngj(Files: dict) -> dict: """Load/parse your combined JSON into NGJ and expand mg lookup.""" json_text = Path(Files["combinedJson"]).read_text(encoding="utf-8") ng = INJ.parse_ngjson(Files, json_text) NGJ = INJ.getNGJ(ng) BG.expand_mg_info(NGJ) # builds mg_device_lookup, etc. return NGJ def _build_topology(ELEMglm: dict, Topol: dict, NGJ: dict) -> dict: BG.getTopol(ELEMglm, Topol) Topol["mg_device_lookup"] = NGJ.get("mg_device_lookup", {}) Topol.update(INJ.microgrid_mapping(NGJ)) # adds mg_map, etc. return Topol def _scope_map_from_topol_and_glm(Topol: dict, ELEMglm: dict) -> Dict[str, Dict[str, str | None]]: """device -> {'mg': 'mg1|mg2|mg3|substation|unmapped', 'mim': 'MIM1'..'MIM4'|None}""" scope = {} mg_map: Dict[str, Dict[str, str]] = Topol.get("mg_map", {}) or {} objTypes = ['switch', 'load', 'inverter_dyn', 'diesel_dg', 'capacitor', 'regulator'] try: names, _ = BG.extractNamesTypes(ELEMglm, objTypes) except Exception: names = [] for t in objTypes: for blk in ELEMglm.get(t, []): try: nm = BG.extractNameOfGlmObject(blk) if nm: names.append(nm) except Exception: pass names = list(dict.fromkeys(names)) for dev in names: ent = mg_map.get(dev) if isinstance(ent, dict) and ent.get("mg") and ent.get("mim"): scope[dev] = {"mg": ent["mg"], "mim": ent["mim"]} else: scope[dev] = {"mg": "unmapped", "mim": None} return scope def _name_from_scope(dev: str, prop: str, scope: Dict[str, Dict[str, str | None]]) -> str: """Build 'MIMx.mgDevice.property' if mapped; else 'mg?Device.property' (schema allows missing MIM).""" ent = scope.get(dev, {"mg": "unmapped", "mim": None}) mg = ent.get("mg") or "unmapped" mim = ent.get("mim") base = f"{mg}{dev}.{prop}" return f"{mim}.{base}" if mim else base def _window_default() -> Dict[str, float]: return {"point_start_s": 1.0, "point_stop_s": 20.0} def _attackplan_skeleton() -> Dict[str, Any]: return { "version": ATTACKPLAN_VERSION, "time": {"start_s": 0.0, "end_s": 60.0}, "mim": {"active": True, "selected": ["MIM1", "MIM2", "MIM3", "MIM4"]}, "plan": [], "compile_hints": {"scenario_id": "a"} } # Value transforms (local) def _flip_status(val: Any) -> str: s = str(val).strip().lower() if s in {"open", "0", "false", "off"}: return "CLOSED" if s in {"closed", "1", "true", "on"}: return "OPEN" return "OPEN" if "open" not in s else "CLOSED" def _to_float(val: Any) -> float | None: try: return float(val) except Exception: return None def _rand_scale(num: float, rng: random.Random) -> float: # scale in [0.5, 1.5] (adjust if you prefer) return num * rng.uniform(0.5, 1.5) # Main item builder (uses INJ.extract_baseline) def _items_from_baseline(ELEMglm: dict, scope_lookup: Dict[str, Dict[str, str | None]], n: int | None = None, seed: int = 7) -> List[Dict[str, Any]]: """ Use IreNatJson.extract_baseline(ELEMglm) to get {(device, prop): value} and dev types, then create AttackPlan v1.1 plan items by flipping status / tweaking numeric values. """ rng = random.Random(seed) baseline, dev_type = INJ.extract_baseline(ELEMglm) # returns dict, dict # Optional downsample pairs = list(baseline.items()) if n is not None and n > 0 and len(pairs) > n: rng.shuffle(pairs) pairs = pairs[:n] items: List[Dict[str, Any]] = [] for (device_name, property_name), original_val in pairs: # Decide new value if property_name.lower() in {"status", "switcha", "switchb", "switchc"}: new_val = _flip_status(original_val) elif str(property_name).startswith("power_out_"): # generator complex literals new_val = rng.choice(["60000+12000j", "40000+8000j", "30000+5000j"]) else: num = _to_float(original_val) if num is None: # fallback if unparsable numeric — skip this property continue # Prefer not to keep 0 for inverter Pref/Qref → give a small nonzero base if (dev_type.get(device_name, "").startswith("inverter") and property_name in ("Pref", "Qref") and num == 0): num = 10000.0 # adjust if you have a domain-specific default new_val = round(_rand_scale(num, rng), 3) # Build plan item name = _name_from_scope(device_name, property_name, scope_lookup) ent = scope_lookup.get(device_name, {"mg": "unmapped", "mim": None}) scope = {"mg": ent.get("mg"), "mim": ent.get("mim"), "apply": "both" if ent.get("mim") else "glm_only"} item = { "name": name, "scope": scope, "op": "set", # normalized; compiler can map to open/close/trip later "point": property_name, "attack_value": new_val, "real_value": original_val, "phase": None, "window": _window_default(), } items.append(item) return items # Output packers def _chat_pair_from_items(items: List[Dict[str, Any]]) -> Dict[str, Any]: plan = _attackplan_skeleton() plan["plan"] = items return { "messages": [ {"role": "system", "content": "You output ONLY JSON, no explanation."}, {"role": "user", "content": "Generate an AttackPlan JSON v1.1 for the following actions. Respect microgrid scope; if a device is unmapped, mark it glm_only. Return ONLY the JSON."}, {"role": "assistant", "content": json.dumps(plan, ensure_ascii=False)} ] } def _attackplan_lines_from_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]: lines = [] pack = 5 # 3–6 items per plan; adjust as you like for i in range(0, len(items), pack): plan = _attackplan_skeleton() plan["plan"] = items[i:i+pack] if plan["plan"]: lines.append(plan) return lines # Main def main(): ap = argparse.ArgumentParser() ap.add_argument("--n", type=int, default=400, help="Max number of (device,property) pairs to sample.") ap.add_argument("--seed", type=int, default=7) args = ap.parse_args() Remarks = BG.initRemarks() Folders, Files, Remarks = _folders_files(Remarks) # Output root: two folders above workspace, into scripts/ ws = Path(Folders["workspace"]).resolve() out_root = ws.parents[1] / "scripts" out_root.mkdir(parents=True, exist_ok=True) # Load inputs initialGlm = _read_initial_glm(Files) ELEMglm, Topol = BG.getELEMs(Files, initialGlm) NGJ = _load_ngj(Files) Topol = _build_topology(ELEMglm, Topol, NGJ) scope_lookup = _scope_map_from_topol_and_glm(Topol, ELEMglm) # Build items using IreNatJson.extract_baseline items = _items_from_baseline(ELEMglm, scope_lookup, n=args.n, seed=args.seed) # Pack outputs chat_line = _chat_pair_from_items(items[:min(25, len(items))]) plan_lines = _attackplan_lines_from_items(items) # Write attackplan_path = out_root / "train_attackplan.jsonl" chat_path = out_root / "train_chat.jsonl" preview_path = out_root / "train_preview.csv" with attackplan_path.open("w", encoding="utf-8") as f: for plan in plan_lines: f.write(json.dumps(plan, ensure_ascii=False) + "\n") with chat_path.open("w", encoding="utf-8") as f: f.write(json.dumps(chat_line, ensure_ascii=False) + "\n") preview = [{ "name": it["name"], "mg": (it.get("scope") or {}).get("mg"), "mim": (it.get("scope") or {}).get("mim"), "apply": (it.get("scope") or {}).get("apply"), "op": it["op"], "point": it["point"], "attack_value": it["attack_value"], "real_value": it["real_value"], "start": it["window"]["point_start_s"], "stop": it["window"]["point_stop_s"], } for it in items[:30]] pd.DataFrame(preview).to_csv(preview_path, index=False) print(f"[ok] wrote {attackplan_path}") print(f"[ok] wrote {chat_path}") print(f"[ok] wrote {preview_path}") if __name__ == "__main__": main()