#!/usr/bin/env python # flatten_game_history.py # # Flatten a Hearthstone Battlegrounds game_history-style JSON file into # a per-turn dataset that training scripts can consume directly. # # Input (single file): # { # "game_metadata": {...}, # "turns": [ # { # "turn_number": 0, # "phase": "PlayerTurn", # "state": { ... nested game_state / player_hero / resources / board_state ... }, # "action_taken": "End Turn", # "battle_result": "Tie", # "health_before_battle": 30, # "health_after_battle": 30, # "health_change": 0, # "reward": 1 # }, # ... # ] # } # # Output (JSON array): # [ # { # "game_id": "", # "step_id": , # "turn": , # "phase": "PlayerTurn", # "state": { ... nested state ... }, # "action_taken": "End Turn", # full-turn description (string) # "battle_result": "Tie", # "health_before_battle": 30, # "health_after_battle": 30, # "health_change": 0, # "reward": 1, # "candidates": [] # later, attach RLAIF candidates of the form: # # { # # "role": "expert" | "medium" | "bad", # # "actions": [ # sequence of atomic actions # # { # # "type": "BUY_FROM_TAVERN" | "PLAY_FROM_HAND" | # # "SELL_FROM_BOARD" | "HERO_POWER" | # # "ROLL" | "UPGRADE_TAVERN" | "FREEZE" | # # "END_TURN", # # "tavern_index": int or null, # # "hand_index": int or null, # # "board_index": int or null, # # "card_name": string or null # # }, # # ... # # ], # # "reward": float # # } # }, # ... # ] import argparse import json from pathlib import Path from typing import Dict, Any, List def _flatten_single_game(game_obj: Dict[str, Any], game_id_hint: str) -> List[Dict[str, Any]]: meta = game_obj.get("game_metadata", {}) or {} turns = game_obj.get("turns", []) or [] game_id = meta.get("game_id") or game_id_hint rows: List[Dict[str, Any]] = [] for t in turns: state = t.get("state", {}) or {} gs = state.get("game_state", {}) or {} phase = t.get("phase") or gs.get("phase", "PlayerTurn") turn = gs.get("turn_number", t.get("turn_number", 0)) row: Dict[str, Any] = { "game_id": game_id, "step_id": t.get("turn_number", turn), "turn": turn, "phase": phase, "state": state, "action_taken": t.get("action_taken"), "battle_result": t.get("battle_result"), "health_before_battle": t.get("health_before_battle"), "health_after_battle": t.get("health_after_battle"), "health_change": t.get("health_change"), "reward": t.get("reward"), # Placeholder for RLAIF annotations to be filled later. # The training loader will skip rows where this list is empty. "candidates": [], } rows.append(row) return rows def flatten_game_history(input_path: str, output_path: str) -> None: in_path = Path(input_path) if not in_path.exists(): raise FileNotFoundError(f"Input file not found: {input_path}") with in_path.open("r", encoding="utf-8") as f: data = json.load(f) rows: List[Dict[str, Any]] = [] if isinstance(data, dict) and "turns" in data: rows.extend(_flatten_single_game(data, game_id_hint=in_path.stem)) elif isinstance(data, list): for idx, item in enumerate(data): if isinstance(item, dict) and "turns" in item: gid = item.get("game_metadata", {}).get("game_id") or f"{in_path.stem}_{idx}" rows.extend(_flatten_single_game(item, game_id_hint=gid)) else: raise ValueError( f"Unsupported JSON object at index {idx} in {input_path}: expected an object with 'turns'." ) else: raise ValueError( f"Unsupported JSON structure in {input_path}: expected a dict with 'turns' or a list of such dicts." ) out_path = Path(output_path) out_path.parent.mkdir(parents=True, exist_ok=True) with out_path.open("w", encoding="utf-8") as f: # Compact JSON (no pretty indentation) to keep the file short and # lightweight while preserving the full nested state. json.dump(rows, f, ensure_ascii=False, separators=(",", ":")) print(f"Flattened {len(rows)} turns to {out_path}") def main() -> None: parser = argparse.ArgumentParser(description="Flatten game_history-style JSON into per-turn dataset.") parser.add_argument( "--input", required=True, help="Path to game_history-style JSON file (single game or list of games).", ) parser.add_argument( "--output", required=False, help=( "Path to output JSON file (array of per-turn rows). " "Defaults to _flat.json in the same directory." ), ) args = parser.parse_args() in_path = Path(args.input) default_out = in_path.with_name(in_path.stem + "_flat.json") out_path = args.output or str(default_out) flatten_game_history(str(in_path), out_path) if __name__ == "__main__": main()