ds6b-attackplan-qlora / scripts /train_attackplan.jsonl
adetuire1's picture
Upload folder using huggingface_hub
fba140f verified
from __future__ import annotations
"""
Created on Sun Aug 17 19:47:52 2025
@author: adetu
"""
# -*- coding: utf-8 -*-
"""
make_train_jsonl.py — builds training JSONL for AttackPlan v1.1 (and a chat sample)
Outputs (saved two folders above your workspace, under scripts/):
- train_attackplan.jsonl # one AttackPlan v1.1 per line
- train_chat.jsonl # one chat-style example (system/user -> assistant JSON)
- train_preview.csv # quick preview of first ~30 items
Run (from EditGlm/ as CWD):
%run scripts/make_train_jsonl.py --n 400 --seed 7
"""
import argparse, json, os, random, sys
from pathlib import Path
from typing import Dict, Any, List, Tuple
import pandas as pd
sys.path.insert(0, os.getcwd())
# Your libs
from libraries import BikdashGLM as BG
from libraries import IreNatJson as INJ
ATTACKPLAN_VERSION = "1.1"
# Filesystem helpers
def _folders_files(Remarks: dict):
res = BG.FoldersAndFiles(Remarks)
if isinstance(res, tuple):
if len(res) == 2:
Folders, Files = res
return Folders, Files, Remarks
elif len(res) == 3:
Folders, Files, Remarks = res
return Folders, Files, Remarks
raise RuntimeError("Unexpected return from BG.FoldersAndFiles(Remarks)")
def _read_initial_glm(Files: dict) -> str:
with open(Files["initialGlm"], "r", encoding="utf-8") as f:
return f.read()
def _load_ngj(Files: dict) -> dict:
"""Load/parse your combined JSON into NGJ and expand mg lookup."""
json_text = Path(Files["combinedJson"]).read_text(encoding="utf-8")
ng = INJ.parse_ngjson(Files, json_text)
NGJ = INJ.getNGJ(ng)
BG.expand_mg_info(NGJ) # builds mg_device_lookup, etc.
return NGJ
def _build_topology(ELEMglm: dict, Topol: dict, NGJ: dict) -> dict:
BG.getTopol(ELEMglm, Topol)
Topol["mg_device_lookup"] = NGJ.get("mg_device_lookup", {})
Topol.update(INJ.microgrid_mapping(NGJ)) # adds mg_map, etc.
return Topol
def _scope_map_from_topol_and_glm(Topol: dict, ELEMglm: dict) -> Dict[str, Dict[str, str | None]]:
"""device -> {'mg': 'mg1|mg2|mg3|substation|unmapped', 'mim': 'MIM1'..'MIM4'|None}"""
scope = {}
mg_map: Dict[str, Dict[str, str]] = Topol.get("mg_map", {}) or {}
objTypes = ['switch', 'load', 'inverter_dyn', 'diesel_dg', 'capacitor', 'regulator']
try:
names, _ = BG.extractNamesTypes(ELEMglm, objTypes)
except Exception:
names = []
for t in objTypes:
for blk in ELEMglm.get(t, []):
try:
nm = BG.extractNameOfGlmObject(blk)
if nm: names.append(nm)
except Exception:
pass
names = list(dict.fromkeys(names))
for dev in names:
ent = mg_map.get(dev)
if isinstance(ent, dict) and ent.get("mg") and ent.get("mim"):
scope[dev] = {"mg": ent["mg"], "mim": ent["mim"]}
else:
scope[dev] = {"mg": "unmapped", "mim": None}
return scope
def _name_from_scope(dev: str, prop: str, scope: Dict[str, Dict[str, str | None]]) -> str:
"""Build 'MIMx.mgDevice.property' if mapped; else 'mg?Device.property' (schema allows missing MIM)."""
ent = scope.get(dev, {"mg": "unmapped", "mim": None})
mg = ent.get("mg") or "unmapped"
mim = ent.get("mim")
base = f"{mg}{dev}.{prop}"
return f"{mim}.{base}" if mim else base
def _window_default() -> Dict[str, float]:
return {"point_start_s": 1.0, "point_stop_s": 20.0}
def _attackplan_skeleton() -> Dict[str, Any]:
return {
"version": ATTACKPLAN_VERSION,
"time": {"start_s": 0.0, "end_s": 60.0},
"mim": {"active": True, "selected": ["MIM1", "MIM2", "MIM3", "MIM4"]},
"plan": [],
"compile_hints": {"scenario_id": "a"}
}
# Value transforms (local)
def _flip_status(val: Any) -> str:
s = str(val).strip().lower()
if s in {"open", "0", "false", "off"}: return "CLOSED"
if s in {"closed", "1", "true", "on"}: return "OPEN"
return "OPEN" if "open" not in s else "CLOSED"
def _to_float(val: Any) -> float | None:
try:
return float(val)
except Exception:
return None
def _rand_scale(num: float, rng: random.Random) -> float:
# scale in [0.5, 1.5] (adjust if you prefer)
return num * rng.uniform(0.5, 1.5)
# Main item builder (uses INJ.extract_baseline)
def _items_from_baseline(ELEMglm: dict,
scope_lookup: Dict[str, Dict[str, str | None]],
n: int | None = None,
seed: int = 7) -> List[Dict[str, Any]]:
"""
Use IreNatJson.extract_baseline(ELEMglm) to get {(device, prop): value} and dev types,
then create AttackPlan v1.1 plan items by flipping status / tweaking numeric values.
"""
rng = random.Random(seed)
baseline, dev_type = INJ.extract_baseline(ELEMglm) # returns dict, dict
# Optional downsample
pairs = list(baseline.items())
if n is not None and n > 0 and len(pairs) > n:
rng.shuffle(pairs)
pairs = pairs[:n]
items: List[Dict[str, Any]] = []
for (device_name, property_name), original_val in pairs:
# Decide new value
if property_name.lower() in {"status", "switcha", "switchb", "switchc"}:
new_val = _flip_status(original_val)
elif str(property_name).startswith("power_out_"): # generator complex literals
new_val = rng.choice(["60000+12000j", "40000+8000j", "30000+5000j"])
else:
num = _to_float(original_val)
if num is None:
# fallback if unparsable numeric — skip this property
continue
# Prefer not to keep 0 for inverter Pref/Qref → give a small nonzero base
if (dev_type.get(device_name, "").startswith("inverter")
and property_name in ("Pref", "Qref") and num == 0):
num = 10000.0 # adjust if you have a domain-specific default
new_val = round(_rand_scale(num, rng), 3)
# Build plan item
name = _name_from_scope(device_name, property_name, scope_lookup)
ent = scope_lookup.get(device_name, {"mg": "unmapped", "mim": None})
scope = {"mg": ent.get("mg"), "mim": ent.get("mim"), "apply": "both" if ent.get("mim") else "glm_only"}
item = {
"name": name,
"scope": scope,
"op": "set", # normalized; compiler can map to open/close/trip later
"point": property_name,
"attack_value": new_val,
"real_value": original_val,
"phase": None,
"window": _window_default(),
}
items.append(item)
return items
# Output packers
def _chat_pair_from_items(items: List[Dict[str, Any]]) -> Dict[str, Any]:
plan = _attackplan_skeleton()
plan["plan"] = items
return {
"messages": [
{"role": "system", "content": "You output ONLY JSON, no explanation."},
{"role": "user", "content": "Generate an AttackPlan JSON v1.1 for the following actions. Respect microgrid scope; if a device is unmapped, mark it glm_only. Return ONLY the JSON."},
{"role": "assistant", "content": json.dumps(plan, ensure_ascii=False)}
]
}
def _attackplan_lines_from_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
lines = []
pack = 5 # 3–6 items per plan; adjust as you like
for i in range(0, len(items), pack):
plan = _attackplan_skeleton()
plan["plan"] = items[i:i+pack]
if plan["plan"]:
lines.append(plan)
return lines
# Main
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--n", type=int, default=400, help="Max number of (device,property) pairs to sample.")
ap.add_argument("--seed", type=int, default=7)
args = ap.parse_args()
Remarks = BG.initRemarks()
Folders, Files, Remarks = _folders_files(Remarks)
# Output root: two folders above workspace, into scripts/
ws = Path(Folders["workspace"]).resolve()
out_root = ws.parents[1] / "scripts"
out_root.mkdir(parents=True, exist_ok=True)
# Load inputs
initialGlm = _read_initial_glm(Files)
ELEMglm, Topol = BG.getELEMs(Files, initialGlm)
NGJ = _load_ngj(Files)
Topol = _build_topology(ELEMglm, Topol, NGJ)
scope_lookup = _scope_map_from_topol_and_glm(Topol, ELEMglm)
# Build items using IreNatJson.extract_baseline
items = _items_from_baseline(ELEMglm, scope_lookup, n=args.n, seed=args.seed)
# Pack outputs
chat_line = _chat_pair_from_items(items[:min(25, len(items))])
plan_lines = _attackplan_lines_from_items(items)
# Write
attackplan_path = out_root / "train_attackplan.jsonl"
chat_path = out_root / "train_chat.jsonl"
preview_path = out_root / "train_preview.csv"
with attackplan_path.open("w", encoding="utf-8") as f:
for plan in plan_lines:
f.write(json.dumps(plan, ensure_ascii=False) + "\n")
with chat_path.open("w", encoding="utf-8") as f:
f.write(json.dumps(chat_line, ensure_ascii=False) + "\n")
preview = [{
"name": it["name"],
"mg": (it.get("scope") or {}).get("mg"),
"mim": (it.get("scope") or {}).get("mim"),
"apply": (it.get("scope") or {}).get("apply"),
"op": it["op"],
"point": it["point"],
"attack_value": it["attack_value"],
"real_value": it["real_value"],
"start": it["window"]["point_start_s"],
"stop": it["window"]["point_stop_s"],
} for it in items[:30]]
pd.DataFrame(preview).to_csv(preview_path, index=False)
print(f"[ok] wrote {attackplan_path}")
print(f"[ok] wrote {chat_path}")
print(f"[ok] wrote {preview_path}")
if __name__ == "__main__":
main()