Spaces:

iteratehack
/

deepbattler

Sleeping

File size: 5,708 Bytes

787c99c

#!/usr/bin/env python
# flatten_game_history.py
#
# Flatten a Hearthstone Battlegrounds game_history-style JSON file into
# a per-turn dataset that training scripts can consume directly.
#
# Input (single file):
# {
#   "game_metadata": {...},
#   "turns": [
#     {
#       "turn_number": 0,
#       "phase": "PlayerTurn",
#       "state": { ... nested game_state / player_hero / resources / board_state ... },
#       "action_taken": "End Turn",
#       "battle_result": "Tie",
#       "health_before_battle": 30,
#       "health_after_battle": 30,
#       "health_change": 0,
#       "reward": 1
#     },
#     ...
#   ]
# }
#
# Output (JSON array):
# [
#   {
#     "game_id": "<from metadata or filename>",
#     "step_id": <turn_number>,
#     "turn": <state.game_state.turn_number>,
#     "phase": "PlayerTurn",
#     "state": { ... nested state ... },
#     "action_taken": "End Turn",  # full-turn description (string)
#     "battle_result": "Tie",
#     "health_before_battle": 30,
#     "health_after_battle": 30,
#     "health_change": 0,
#     "reward": 1,
#     "candidates": []  # later, attach RLAIF candidates of the form:
#                        # {
#                        #   "role": "expert" | "medium" | "bad",
#                        #   "actions": [  # sequence of atomic actions
#                        #     {
#                        #       "type": "BUY_FROM_TAVERN" | "PLAY_FROM_HAND" |
#                        #                "SELL_FROM_BOARD" | "HERO_POWER" |
#                        #                "ROLL" | "UPGRADE_TAVERN" | "FREEZE" |
#                        #                "END_TURN",
#                        #       "tavern_index": int or null,
#                        #       "hand_index": int or null,
#                        #       "board_index": int or null,
#                        #       "card_name": string or null
#                        #     },
#                        #     ...
#                        #   ],
#                        #   "reward": float
#                        # }
#   },
#   ...
# ]

import argparse
import json
from pathlib import Path
from typing import Dict, Any, List


def _flatten_single_game(game_obj: Dict[str, Any], game_id_hint: str) -> List[Dict[str, Any]]:
    meta = game_obj.get("game_metadata", {}) or {}
    turns = game_obj.get("turns", []) or []

    game_id = meta.get("game_id") or game_id_hint

    rows: List[Dict[str, Any]] = []
    for t in turns:
        state = t.get("state", {}) or {}
        gs = state.get("game_state", {}) or {}

        phase = t.get("phase") or gs.get("phase", "PlayerTurn")
        turn = gs.get("turn_number", t.get("turn_number", 0))

        row: Dict[str, Any] = {
            "game_id": game_id,
            "step_id": t.get("turn_number", turn),
            "turn": turn,
            "phase": phase,
            "state": state,
            "action_taken": t.get("action_taken"),
            "battle_result": t.get("battle_result"),
            "health_before_battle": t.get("health_before_battle"),
            "health_after_battle": t.get("health_after_battle"),
            "health_change": t.get("health_change"),
            "reward": t.get("reward"),
            # Placeholder for RLAIF annotations to be filled later.
            # The training loader will skip rows where this list is empty.
            "candidates": [],
        }
        rows.append(row)

    return rows


def flatten_game_history(input_path: str, output_path: str) -> None:
    in_path = Path(input_path)
    if not in_path.exists():
        raise FileNotFoundError(f"Input file not found: {input_path}")

    with in_path.open("r", encoding="utf-8") as f:
        data = json.load(f)

    rows: List[Dict[str, Any]] = []

    if isinstance(data, dict) and "turns" in data:
        rows.extend(_flatten_single_game(data, game_id_hint=in_path.stem))
    elif isinstance(data, list):
        for idx, item in enumerate(data):
            if isinstance(item, dict) and "turns" in item:
                gid = item.get("game_metadata", {}).get("game_id") or f"{in_path.stem}_{idx}"
                rows.extend(_flatten_single_game(item, game_id_hint=gid))
            else:
                raise ValueError(
                    f"Unsupported JSON object at index {idx} in {input_path}: expected an object with 'turns'."
                )
    else:
        raise ValueError(
            f"Unsupported JSON structure in {input_path}: expected a dict with 'turns' or a list of such dicts."
        )

    out_path = Path(output_path)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    with out_path.open("w", encoding="utf-8") as f:
        # Compact JSON (no pretty indentation) to keep the file short and
        # lightweight while preserving the full nested state.
        json.dump(rows, f, ensure_ascii=False, separators=(",", ":"))

    print(f"Flattened {len(rows)} turns to {out_path}")


def main() -> None:
    parser = argparse.ArgumentParser(description="Flatten game_history-style JSON into per-turn dataset.")
    parser.add_argument(
        "--input",
        required=True,
        help="Path to game_history-style JSON file (single game or list of games).",
    )
    parser.add_argument(
        "--output",
        required=False,
        help=(
            "Path to output JSON file (array of per-turn rows). "
            "Defaults to <input_stem>_flat.json in the same directory."
        ),
    )

    args = parser.parse_args()
    in_path = Path(args.input)
    default_out = in_path.with_name(in_path.stem + "_flat.json")
    out_path = args.output or str(default_out)

    flatten_game_history(str(in_path), out_path)


if __name__ == "__main__":
    main()