File size: 10,115 Bytes

fba140f

from __future__ import annotations
"""

Created on Sun Aug 17 19:47:52 2025



@author: adetu

"""

# -*- coding: utf-8 -*-
"""

make_train_jsonl.py  —  builds training JSONL for AttackPlan v1.1 (and a chat sample)



Outputs (saved two folders above your workspace, under scripts/):

  - train_attackplan.jsonl   # one AttackPlan v1.1 per line

  - train_chat.jsonl         # one chat-style example (system/user -> assistant JSON)

  - train_preview.csv        # quick preview of first ~30 items



Run (from EditGlm/ as CWD):

  %run scripts/make_train_jsonl.py --n 400 --seed 7

"""

import argparse, json, os, random, sys
from pathlib import Path
from typing import Dict, Any, List, Tuple

import pandas as pd


sys.path.insert(0, os.getcwd())

# Your libs
from libraries import BikdashGLM as BG
from libraries import IreNatJson as INJ

ATTACKPLAN_VERSION = "1.1"


# Filesystem helpers

def _folders_files(Remarks: dict):
    res = BG.FoldersAndFiles(Remarks)
    if isinstance(res, tuple):
        if len(res) == 2:
            Folders, Files = res
            return Folders, Files, Remarks
        elif len(res) == 3:
            Folders, Files, Remarks = res
            return Folders, Files, Remarks
    raise RuntimeError("Unexpected return from BG.FoldersAndFiles(Remarks)")

def _read_initial_glm(Files: dict) -> str:
    with open(Files["initialGlm"], "r", encoding="utf-8") as f:
        return f.read()

def _load_ngj(Files: dict) -> dict:
    """Load/parse your combined JSON into NGJ and expand mg lookup."""
    json_text = Path(Files["combinedJson"]).read_text(encoding="utf-8")
    ng = INJ.parse_ngjson(Files, json_text)
    NGJ = INJ.getNGJ(ng)
    BG.expand_mg_info(NGJ)  # builds mg_device_lookup, etc.
    return NGJ

def _build_topology(ELEMglm: dict, Topol: dict, NGJ: dict) -> dict:
    BG.getTopol(ELEMglm, Topol)
    Topol["mg_device_lookup"] = NGJ.get("mg_device_lookup", {})
    Topol.update(INJ.microgrid_mapping(NGJ))  # adds mg_map, etc.
    return Topol

def _scope_map_from_topol_and_glm(Topol: dict, ELEMglm: dict) -> Dict[str, Dict[str, str | None]]:
    """device -> {'mg': 'mg1|mg2|mg3|substation|unmapped', 'mim': 'MIM1'..'MIM4'|None}"""
    scope = {}
    mg_map: Dict[str, Dict[str, str]] = Topol.get("mg_map", {}) or {}
    objTypes = ['switch', 'load', 'inverter_dyn', 'diesel_dg', 'capacitor', 'regulator']
    try:
        names, _ = BG.extractNamesTypes(ELEMglm, objTypes)
    except Exception:
        names = []
        for t in objTypes:
            for blk in ELEMglm.get(t, []):
                try:
                    nm = BG.extractNameOfGlmObject(blk)
                    if nm: names.append(nm)
                except Exception:
                    pass
    names = list(dict.fromkeys(names))
    for dev in names:
        ent = mg_map.get(dev)
        if isinstance(ent, dict) and ent.get("mg") and ent.get("mim"):
            scope[dev] = {"mg": ent["mg"], "mim": ent["mim"]}
        else:
            scope[dev] = {"mg": "unmapped", "mim": None}
    return scope

def _name_from_scope(dev: str, prop: str, scope: Dict[str, Dict[str, str | None]]) -> str:
    """Build 'MIMx.mgDevice.property' if mapped; else 'mg?Device.property' (schema allows missing MIM)."""
    ent = scope.get(dev, {"mg": "unmapped", "mim": None})
    mg = ent.get("mg") or "unmapped"
    mim = ent.get("mim")
    base = f"{mg}{dev}.{prop}"
    return f"{mim}.{base}" if mim else base

def _window_default() -> Dict[str, float]:
    return {"point_start_s": 1.0, "point_stop_s": 20.0}

def _attackplan_skeleton() -> Dict[str, Any]:
    return {
        "version": ATTACKPLAN_VERSION,
        "time": {"start_s": 0.0, "end_s": 60.0},
        "mim": {"active": True, "selected": ["MIM1", "MIM2", "MIM3", "MIM4"]},
        "plan": [],
        "compile_hints": {"scenario_id": "a"}
    }

# Value transforms (local)

def _flip_status(val: Any) -> str:
    s = str(val).strip().lower()
    if s in {"open", "0", "false", "off"}:   return "CLOSED"
    if s in {"closed", "1", "true", "on"}:   return "OPEN"
    return "OPEN" if "open" not in s else "CLOSED"

def _to_float(val: Any) -> float | None:
    try:
        return float(val)
    except Exception:
        return None

def _rand_scale(num: float, rng: random.Random) -> float:
    # scale in [0.5, 1.5]  (adjust if you prefer)
    return num * rng.uniform(0.5, 1.5)

# Main item builder (uses INJ.extract_baseline)

def _items_from_baseline(ELEMglm: dict,

                         scope_lookup: Dict[str, Dict[str, str | None]],

                         n: int | None = None,

                         seed: int = 7) -> List[Dict[str, Any]]:
    """

    Use IreNatJson.extract_baseline(ELEMglm) to get {(device, prop): value} and dev types,

    then create AttackPlan v1.1 plan items by flipping status / tweaking numeric values.

    """
    rng = random.Random(seed)
    baseline, dev_type = INJ.extract_baseline(ELEMglm)  # returns dict, dict

    # Optional downsample
    pairs = list(baseline.items())
    if n is not None and n > 0 and len(pairs) > n:
        rng.shuffle(pairs)
        pairs = pairs[:n]

    items: List[Dict[str, Any]] = []
    for (device_name, property_name), original_val in pairs:
        # Decide new value
        if property_name.lower() in {"status", "switcha", "switchb", "switchc"}:
            new_val = _flip_status(original_val)
        elif str(property_name).startswith("power_out_"):  # generator complex literals
            new_val = rng.choice(["60000+12000j", "40000+8000j", "30000+5000j"])
        else:
            num = _to_float(original_val)
            if num is None:
                # fallback if unparsable numeric — skip this property
                continue
            # Prefer not to keep 0 for inverter Pref/Qref → give a small nonzero base
            if (dev_type.get(device_name, "").startswith("inverter")
                and property_name in ("Pref", "Qref") and num == 0):
                num = 10000.0  # adjust if you have a domain-specific default
            new_val = round(_rand_scale(num, rng), 3)

        # Build plan item
        name = _name_from_scope(device_name, property_name, scope_lookup)
        ent = scope_lookup.get(device_name, {"mg": "unmapped", "mim": None})
        scope = {"mg": ent.get("mg"), "mim": ent.get("mim"), "apply": "both" if ent.get("mim") else "glm_only"}
        item = {
            "name": name,
            "scope": scope,
            "op": "set",                         # normalized; compiler can map to open/close/trip later
            "point": property_name,
            "attack_value": new_val,
            "real_value": original_val,
            "phase": None,
            "window": _window_default(),
        }
        items.append(item)
    return items

# Output packers

def _chat_pair_from_items(items: List[Dict[str, Any]]) -> Dict[str, Any]:
    plan = _attackplan_skeleton()
    plan["plan"] = items
    return {
        "messages": [
            {"role": "system", "content": "You output ONLY JSON, no explanation."},
            {"role": "user", "content": "Generate an AttackPlan JSON v1.1 for the following actions. Respect microgrid scope; if a device is unmapped, mark it glm_only. Return ONLY the JSON."},
            {"role": "assistant", "content": json.dumps(plan, ensure_ascii=False)}
        ]
    }

def _attackplan_lines_from_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    lines = []
    pack = 5  # 3–6 items per plan; adjust as you like
    for i in range(0, len(items), pack):
        plan = _attackplan_skeleton()
        plan["plan"] = items[i:i+pack]
        if plan["plan"]:
            lines.append(plan)
    return lines

# Main

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--n", type=int, default=400, help="Max number of (device,property) pairs to sample.")
    ap.add_argument("--seed", type=int, default=7)
    args = ap.parse_args()

    Remarks = BG.initRemarks()
    Folders, Files, Remarks = _folders_files(Remarks)

    # Output root: two folders above workspace, into scripts/
    ws = Path(Folders["workspace"]).resolve()
    out_root = ws.parents[1] / "scripts"
    out_root.mkdir(parents=True, exist_ok=True)

    # Load inputs
    initialGlm = _read_initial_glm(Files)
    ELEMglm, Topol = BG.getELEMs(Files, initialGlm)
    NGJ = _load_ngj(Files)
    Topol = _build_topology(ELEMglm, Topol, NGJ)
    scope_lookup = _scope_map_from_topol_and_glm(Topol, ELEMglm)

    # Build items using IreNatJson.extract_baseline
    items = _items_from_baseline(ELEMglm, scope_lookup, n=args.n, seed=args.seed)

    # Pack outputs
    chat_line = _chat_pair_from_items(items[:min(25, len(items))])
    plan_lines = _attackplan_lines_from_items(items)

    # Write
    attackplan_path = out_root / "train_attackplan.jsonl"
    chat_path = out_root / "train_chat.jsonl"
    preview_path = out_root / "train_preview.csv"

    with attackplan_path.open("w", encoding="utf-8") as f:
        for plan in plan_lines:
            f.write(json.dumps(plan, ensure_ascii=False) + "\n")

    with chat_path.open("w", encoding="utf-8") as f:
        f.write(json.dumps(chat_line, ensure_ascii=False) + "\n")

    preview = [{
        "name": it["name"],
        "mg": (it.get("scope") or {}).get("mg"),
        "mim": (it.get("scope") or {}).get("mim"),
        "apply": (it.get("scope") or {}).get("apply"),
        "op": it["op"],
        "point": it["point"],
        "attack_value": it["attack_value"],
        "real_value": it["real_value"],
        "start": it["window"]["point_start_s"],
        "stop": it["window"]["point_stop_s"],
    } for it in items[:30]]
    pd.DataFrame(preview).to_csv(preview_path, index=False)

    print(f"[ok] wrote {attackplan_path}")
    print(f"[ok] wrote {chat_path}")
    print(f"[ok] wrote {preview_path}")

if __name__ == "__main__":
    main()