Spaces:
Sleeping
Sleeping
File size: 5,708 Bytes
787c99c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
#!/usr/bin/env python
# flatten_game_history.py
#
# Flatten a Hearthstone Battlegrounds game_history-style JSON file into
# a per-turn dataset that training scripts can consume directly.
#
# Input (single file):
# {
# "game_metadata": {...},
# "turns": [
# {
# "turn_number": 0,
# "phase": "PlayerTurn",
# "state": { ... nested game_state / player_hero / resources / board_state ... },
# "action_taken": "End Turn",
# "battle_result": "Tie",
# "health_before_battle": 30,
# "health_after_battle": 30,
# "health_change": 0,
# "reward": 1
# },
# ...
# ]
# }
#
# Output (JSON array):
# [
# {
# "game_id": "<from metadata or filename>",
# "step_id": <turn_number>,
# "turn": <state.game_state.turn_number>,
# "phase": "PlayerTurn",
# "state": { ... nested state ... },
# "action_taken": "End Turn", # full-turn description (string)
# "battle_result": "Tie",
# "health_before_battle": 30,
# "health_after_battle": 30,
# "health_change": 0,
# "reward": 1,
# "candidates": [] # later, attach RLAIF candidates of the form:
# # {
# # "role": "expert" | "medium" | "bad",
# # "actions": [ # sequence of atomic actions
# # {
# # "type": "BUY_FROM_TAVERN" | "PLAY_FROM_HAND" |
# # "SELL_FROM_BOARD" | "HERO_POWER" |
# # "ROLL" | "UPGRADE_TAVERN" | "FREEZE" |
# # "END_TURN",
# # "tavern_index": int or null,
# # "hand_index": int or null,
# # "board_index": int or null,
# # "card_name": string or null
# # },
# # ...
# # ],
# # "reward": float
# # }
# },
# ...
# ]
import argparse
import json
from pathlib import Path
from typing import Dict, Any, List
def _flatten_single_game(game_obj: Dict[str, Any], game_id_hint: str) -> List[Dict[str, Any]]:
meta = game_obj.get("game_metadata", {}) or {}
turns = game_obj.get("turns", []) or []
game_id = meta.get("game_id") or game_id_hint
rows: List[Dict[str, Any]] = []
for t in turns:
state = t.get("state", {}) or {}
gs = state.get("game_state", {}) or {}
phase = t.get("phase") or gs.get("phase", "PlayerTurn")
turn = gs.get("turn_number", t.get("turn_number", 0))
row: Dict[str, Any] = {
"game_id": game_id,
"step_id": t.get("turn_number", turn),
"turn": turn,
"phase": phase,
"state": state,
"action_taken": t.get("action_taken"),
"battle_result": t.get("battle_result"),
"health_before_battle": t.get("health_before_battle"),
"health_after_battle": t.get("health_after_battle"),
"health_change": t.get("health_change"),
"reward": t.get("reward"),
# Placeholder for RLAIF annotations to be filled later.
# The training loader will skip rows where this list is empty.
"candidates": [],
}
rows.append(row)
return rows
def flatten_game_history(input_path: str, output_path: str) -> None:
in_path = Path(input_path)
if not in_path.exists():
raise FileNotFoundError(f"Input file not found: {input_path}")
with in_path.open("r", encoding="utf-8") as f:
data = json.load(f)
rows: List[Dict[str, Any]] = []
if isinstance(data, dict) and "turns" in data:
rows.extend(_flatten_single_game(data, game_id_hint=in_path.stem))
elif isinstance(data, list):
for idx, item in enumerate(data):
if isinstance(item, dict) and "turns" in item:
gid = item.get("game_metadata", {}).get("game_id") or f"{in_path.stem}_{idx}"
rows.extend(_flatten_single_game(item, game_id_hint=gid))
else:
raise ValueError(
f"Unsupported JSON object at index {idx} in {input_path}: expected an object with 'turns'."
)
else:
raise ValueError(
f"Unsupported JSON structure in {input_path}: expected a dict with 'turns' or a list of such dicts."
)
out_path = Path(output_path)
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", encoding="utf-8") as f:
# Compact JSON (no pretty indentation) to keep the file short and
# lightweight while preserving the full nested state.
json.dump(rows, f, ensure_ascii=False, separators=(",", ":"))
print(f"Flattened {len(rows)} turns to {out_path}")
def main() -> None:
parser = argparse.ArgumentParser(description="Flatten game_history-style JSON into per-turn dataset.")
parser.add_argument(
"--input",
required=True,
help="Path to game_history-style JSON file (single game or list of games).",
)
parser.add_argument(
"--output",
required=False,
help=(
"Path to output JSON file (array of per-turn rows). "
"Defaults to <input_stem>_flat.json in the same directory."
),
)
args = parser.parse_args()
in_path = Path(args.input)
default_out = in_path.with_name(in_path.stem + "_flat.json")
out_path = args.output or str(default_out)
flatten_game_history(str(in_path), out_path)
if __name__ == "__main__":
main()
|