Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| # flatten_game_history.py | |
| # | |
| # Flatten a Hearthstone Battlegrounds game_history-style JSON file into | |
| # a per-turn dataset that training scripts can consume directly. | |
| # | |
| # Input (single file): | |
| # { | |
| # "game_metadata": {...}, | |
| # "turns": [ | |
| # { | |
| # "turn_number": 0, | |
| # "phase": "PlayerTurn", | |
| # "state": { ... nested game_state / player_hero / resources / board_state ... }, | |
| # "action_taken": "End Turn", | |
| # "battle_result": "Tie", | |
| # "health_before_battle": 30, | |
| # "health_after_battle": 30, | |
| # "health_change": 0, | |
| # "reward": 1 | |
| # }, | |
| # ... | |
| # ] | |
| # } | |
| # | |
| # Output (JSON array): | |
| # [ | |
| # { | |
| # "game_id": "<from metadata or filename>", | |
| # "step_id": <turn_number>, | |
| # "turn": <state.game_state.turn_number>, | |
| # "phase": "PlayerTurn", | |
| # "state": { ... nested state ... }, | |
| # "action_taken": "End Turn", # full-turn description (string) | |
| # "battle_result": "Tie", | |
| # "health_before_battle": 30, | |
| # "health_after_battle": 30, | |
| # "health_change": 0, | |
| # "reward": 1, | |
| # "candidates": [] # later, attach RLAIF candidates of the form: | |
| # # { | |
| # # "role": "expert" | "medium" | "bad", | |
| # # "actions": [ # sequence of atomic actions | |
| # # { | |
| # # "type": "BUY_FROM_TAVERN" | "PLAY_FROM_HAND" | | |
| # # "SELL_FROM_BOARD" | "HERO_POWER" | | |
| # # "ROLL" | "UPGRADE_TAVERN" | "FREEZE" | | |
| # # "END_TURN", | |
| # # "tavern_index": int or null, | |
| # # "hand_index": int or null, | |
| # # "board_index": int or null, | |
| # # "card_name": string or null | |
| # # }, | |
| # # ... | |
| # # ], | |
| # # "reward": float | |
| # # } | |
| # }, | |
| # ... | |
| # ] | |
| import argparse | |
| import json | |
| from pathlib import Path | |
| from typing import Dict, Any, List | |
| def _flatten_single_game(game_obj: Dict[str, Any], game_id_hint: str) -> List[Dict[str, Any]]: | |
| meta = game_obj.get("game_metadata", {}) or {} | |
| turns = game_obj.get("turns", []) or [] | |
| game_id = meta.get("game_id") or game_id_hint | |
| rows: List[Dict[str, Any]] = [] | |
| for t in turns: | |
| state = t.get("state", {}) or {} | |
| gs = state.get("game_state", {}) or {} | |
| phase = t.get("phase") or gs.get("phase", "PlayerTurn") | |
| turn = gs.get("turn_number", t.get("turn_number", 0)) | |
| row: Dict[str, Any] = { | |
| "game_id": game_id, | |
| "step_id": t.get("turn_number", turn), | |
| "turn": turn, | |
| "phase": phase, | |
| "state": state, | |
| "action_taken": t.get("action_taken"), | |
| "battle_result": t.get("battle_result"), | |
| "health_before_battle": t.get("health_before_battle"), | |
| "health_after_battle": t.get("health_after_battle"), | |
| "health_change": t.get("health_change"), | |
| "reward": t.get("reward"), | |
| # Placeholder for RLAIF annotations to be filled later. | |
| # The training loader will skip rows where this list is empty. | |
| "candidates": [], | |
| } | |
| rows.append(row) | |
| return rows | |
| def flatten_game_history(input_path: str, output_path: str) -> None: | |
| in_path = Path(input_path) | |
| if not in_path.exists(): | |
| raise FileNotFoundError(f"Input file not found: {input_path}") | |
| with in_path.open("r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| rows: List[Dict[str, Any]] = [] | |
| if isinstance(data, dict) and "turns" in data: | |
| rows.extend(_flatten_single_game(data, game_id_hint=in_path.stem)) | |
| elif isinstance(data, list): | |
| for idx, item in enumerate(data): | |
| if isinstance(item, dict) and "turns" in item: | |
| gid = item.get("game_metadata", {}).get("game_id") or f"{in_path.stem}_{idx}" | |
| rows.extend(_flatten_single_game(item, game_id_hint=gid)) | |
| else: | |
| raise ValueError( | |
| f"Unsupported JSON object at index {idx} in {input_path}: expected an object with 'turns'." | |
| ) | |
| else: | |
| raise ValueError( | |
| f"Unsupported JSON structure in {input_path}: expected a dict with 'turns' or a list of such dicts." | |
| ) | |
| out_path = Path(output_path) | |
| out_path.parent.mkdir(parents=True, exist_ok=True) | |
| with out_path.open("w", encoding="utf-8") as f: | |
| # Compact JSON (no pretty indentation) to keep the file short and | |
| # lightweight while preserving the full nested state. | |
| json.dump(rows, f, ensure_ascii=False, separators=(",", ":")) | |
| print(f"Flattened {len(rows)} turns to {out_path}") | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description="Flatten game_history-style JSON into per-turn dataset.") | |
| parser.add_argument( | |
| "--input", | |
| required=True, | |
| help="Path to game_history-style JSON file (single game or list of games).", | |
| ) | |
| parser.add_argument( | |
| "--output", | |
| required=False, | |
| help=( | |
| "Path to output JSON file (array of per-turn rows). " | |
| "Defaults to <input_stem>_flat.json in the same directory." | |
| ), | |
| ) | |
| args = parser.parse_args() | |
| in_path = Path(args.input) | |
| default_out = in_path.with_name(in_path.stem + "_flat.json") | |
| out_path = args.output or str(default_out) | |
| flatten_game_history(str(in_path), out_path) | |
| if __name__ == "__main__": | |
| main() | |