ds6b-attackplan-qlora / scripts /train_attackplan.jsonl

Upload folder using huggingface_hub

fba140f verified 7 months ago

10.1 kB

	from __future__ import annotations
	"""
	Created on Sun Aug 17 19:47:52 2025

	@author: adetu
	"""

	# -- coding: utf-8 --
	"""
	make_train_jsonl.py — builds training JSONL for AttackPlan v1.1 (and a chat sample)

	Outputs (saved two folders above your workspace, under scripts/):
	- train_attackplan.jsonl # one AttackPlan v1.1 per line
	- train_chat.jsonl # one chat-style example (system/user -> assistant JSON)
	- train_preview.csv # quick preview of first ~30 items

	Run (from EditGlm/ as CWD):
	%run scripts/make_train_jsonl.py --n 400 --seed 7
	"""

	import argparse, json, os, random, sys
	from pathlib import Path
	from typing import Dict, Any, List, Tuple

	import pandas as pd


	sys.path.insert(0, os.getcwd())

	# Your libs
	from libraries import BikdashGLM as BG
	from libraries import IreNatJson as INJ

	ATTACKPLAN_VERSION = "1.1"


	# Filesystem helpers

	def _folders_files(Remarks: dict):
	res = BG.FoldersAndFiles(Remarks)
	if isinstance(res, tuple):
	if len(res) == 2:
	Folders, Files = res
	return Folders, Files, Remarks
	elif len(res) == 3:
	Folders, Files, Remarks = res
	return Folders, Files, Remarks
	raise RuntimeError("Unexpected return from BG.FoldersAndFiles(Remarks)")

	def _read_initial_glm(Files: dict) -> str:
	with open(Files["initialGlm"], "r", encoding="utf-8") as f:
	return f.read()

	def _load_ngj(Files: dict) -> dict:
	"""Load/parse your combined JSON into NGJ and expand mg lookup."""
	json_text = Path(Files["combinedJson"]).read_text(encoding="utf-8")
	ng = INJ.parse_ngjson(Files, json_text)
	NGJ = INJ.getNGJ(ng)
	BG.expand_mg_info(NGJ) # builds mg_device_lookup, etc.
	return NGJ

	def _build_topology(ELEMglm: dict, Topol: dict, NGJ: dict) -> dict:
	BG.getTopol(ELEMglm, Topol)
	Topol["mg_device_lookup"] = NGJ.get("mg_device_lookup", {})
	Topol.update(INJ.microgrid_mapping(NGJ)) # adds mg_map, etc.
	return Topol

	def _scope_map_from_topol_and_glm(Topol: dict, ELEMglm: dict) -> Dict[str, Dict[str, str \| None]]:
	"""device -> {'mg': 'mg1\|mg2\|mg3\|substation\|unmapped', 'mim': 'MIM1'..'MIM4'\|None}"""
	scope = {}
	mg_map: Dict[str, Dict[str, str]] = Topol.get("mg_map", {}) or {}
	objTypes = ['switch', 'load', 'inverter_dyn', 'diesel_dg', 'capacitor', 'regulator']
	try:
	names, _ = BG.extractNamesTypes(ELEMglm, objTypes)
	except Exception:
	names = []
	for t in objTypes:
	for blk in ELEMglm.get(t, []):
	try:
	nm = BG.extractNameOfGlmObject(blk)
	if nm: names.append(nm)
	except Exception:
	pass
	names = list(dict.fromkeys(names))
	for dev in names:
	ent = mg_map.get(dev)
	if isinstance(ent, dict) and ent.get("mg") and ent.get("mim"):
	scope[dev] = {"mg": ent["mg"], "mim": ent["mim"]}
	else:
	scope[dev] = {"mg": "unmapped", "mim": None}
	return scope

	def _name_from_scope(dev: str, prop: str, scope: Dict[str, Dict[str, str \| None]]) -> str:
	"""Build 'MIMx.mgDevice.property' if mapped; else 'mg?Device.property' (schema allows missing MIM)."""
	ent = scope.get(dev, {"mg": "unmapped", "mim": None})
	mg = ent.get("mg") or "unmapped"
	mim = ent.get("mim")
	base = f"{mg}{dev}.{prop}"
	return f"{mim}.{base}" if mim else base

	def _window_default() -> Dict[str, float]:
	return {"point_start_s": 1.0, "point_stop_s": 20.0}

	def _attackplan_skeleton() -> Dict[str, Any]:
	return {
	"version": ATTACKPLAN_VERSION,
	"time": {"start_s": 0.0, "end_s": 60.0},
	"mim": {"active": True, "selected": ["MIM1", "MIM2", "MIM3", "MIM4"]},
	"plan": [],
	"compile_hints": {"scenario_id": "a"}
	}

	# Value transforms (local)

	def _flip_status(val: Any) -> str:
	s = str(val).strip().lower()
	if s in {"open", "0", "false", "off"}: return "CLOSED"
	if s in {"closed", "1", "true", "on"}: return "OPEN"
	return "OPEN" if "open" not in s else "CLOSED"

	def _to_float(val: Any) -> float \| None:
	try:
	return float(val)
	except Exception:
	return None

	def _rand_scale(num: float, rng: random.Random) -> float:
	# scale in [0.5, 1.5] (adjust if you prefer)
	return num * rng.uniform(0.5, 1.5)

	# Main item builder (uses INJ.extract_baseline)

	def _items_from_baseline(ELEMglm: dict,
	scope_lookup: Dict[str, Dict[str, str \| None]],
	n: int \| None = None,
	seed: int = 7) -> List[Dict[str, Any]]:
	"""
	Use IreNatJson.extract_baseline(ELEMglm) to get {(device, prop): value} and dev types,
	then create AttackPlan v1.1 plan items by flipping status / tweaking numeric values.
	"""
	rng = random.Random(seed)
	baseline, dev_type = INJ.extract_baseline(ELEMglm) # returns dict, dict

	# Optional downsample
	pairs = list(baseline.items())
	if n is not None and n > 0 and len(pairs) > n:
	rng.shuffle(pairs)
	pairs = pairs[:n]

	items: List[Dict[str, Any]] = []
	for (device_name, property_name), original_val in pairs:
	# Decide new value
	if property_name.lower() in {"status", "switcha", "switchb", "switchc"}:
	new_val = _flip_status(original_val)
	elif str(property_name).startswith("power_out_"): # generator complex literals
	new_val = rng.choice(["60000+12000j", "40000+8000j", "30000+5000j"])
	else:
	num = _to_float(original_val)
	if num is None:
	# fallback if unparsable numeric — skip this property
	continue
	# Prefer not to keep 0 for inverter Pref/Qref → give a small nonzero base
	if (dev_type.get(device_name, "").startswith("inverter")
	and property_name in ("Pref", "Qref") and num == 0):
	num = 10000.0 # adjust if you have a domain-specific default
	new_val = round(_rand_scale(num, rng), 3)

	# Build plan item
	name = _name_from_scope(device_name, property_name, scope_lookup)
	ent = scope_lookup.get(device_name, {"mg": "unmapped", "mim": None})
	scope = {"mg": ent.get("mg"), "mim": ent.get("mim"), "apply": "both" if ent.get("mim") else "glm_only"}
	item = {
	"name": name,
	"scope": scope,
	"op": "set", # normalized; compiler can map to open/close/trip later
	"point": property_name,
	"attack_value": new_val,
	"real_value": original_val,
	"phase": None,
	"window": _window_default(),
	}
	items.append(item)
	return items

	# Output packers

	def _chat_pair_from_items(items: List[Dict[str, Any]]) -> Dict[str, Any]:
	plan = _attackplan_skeleton()
	plan["plan"] = items
	return {
	"messages": [
	{"role": "system", "content": "You output ONLY JSON, no explanation."},
	{"role": "user", "content": "Generate an AttackPlan JSON v1.1 for the following actions. Respect microgrid scope; if a device is unmapped, mark it glm_only. Return ONLY the JSON."},
	{"role": "assistant", "content": json.dumps(plan, ensure_ascii=False)}
	]
	}

	def _attackplan_lines_from_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
	lines = []
	pack = 5 # 3–6 items per plan; adjust as you like
	for i in range(0, len(items), pack):
	plan = _attackplan_skeleton()
	plan["plan"] = items[i:i+pack]
	if plan["plan"]:
	lines.append(plan)
	return lines

	# Main

	def main():
	ap = argparse.ArgumentParser()
	ap.add_argument("--n", type=int, default=400, help="Max number of (device,property) pairs to sample.")
	ap.add_argument("--seed", type=int, default=7)
	args = ap.parse_args()

	Remarks = BG.initRemarks()
	Folders, Files, Remarks = _folders_files(Remarks)

	# Output root: two folders above workspace, into scripts/
	ws = Path(Folders["workspace"]).resolve()
	out_root = ws.parents[1] / "scripts"
	out_root.mkdir(parents=True, exist_ok=True)

	# Load inputs
	initialGlm = _read_initial_glm(Files)
	ELEMglm, Topol = BG.getELEMs(Files, initialGlm)
	NGJ = _load_ngj(Files)
	Topol = _build_topology(ELEMglm, Topol, NGJ)
	scope_lookup = _scope_map_from_topol_and_glm(Topol, ELEMglm)

	# Build items using IreNatJson.extract_baseline
	items = _items_from_baseline(ELEMglm, scope_lookup, n=args.n, seed=args.seed)

	# Pack outputs
	chat_line = _chat_pair_from_items(items[:min(25, len(items))])
	plan_lines = _attackplan_lines_from_items(items)

	# Write
	attackplan_path = out_root / "train_attackplan.jsonl"
	chat_path = out_root / "train_chat.jsonl"
	preview_path = out_root / "train_preview.csv"

	with attackplan_path.open("w", encoding="utf-8") as f:
	for plan in plan_lines:
	f.write(json.dumps(plan, ensure_ascii=False) + "\n")

	with chat_path.open("w", encoding="utf-8") as f:
	f.write(json.dumps(chat_line, ensure_ascii=False) + "\n")

	preview = [{
	"name": it["name"],
	"mg": (it.get("scope") or {}).get("mg"),
	"mim": (it.get("scope") or {}).get("mim"),
	"apply": (it.get("scope") or {}).get("apply"),
	"op": it["op"],
	"point": it["point"],
	"attack_value": it["attack_value"],
	"real_value": it["real_value"],
	"start": it["window"]["point_start_s"],
	"stop": it["window"]["point_stop_s"],
	} for it in items[:30]]
	pd.DataFrame(preview).to_csv(preview_path, index=False)

	print(f"[ok] wrote {attackplan_path}")
	print(f"[ok] wrote {chat_path}")
	print(f"[ok] wrote {preview_path}")

	if __name__ == "__main__":
	main()