#!/usr/bin/env python # infer_battleground_cloud.py # # Cloud-based inference script for a fine-tuned Battlegrounds Qwen model hosted on Hugging Face. # # Backends supported: # 1. Hugging Face Space exposing /generate_actions (preferred for this project) # 2. Hugging Face Inference Endpoint / Hosted model via InferenceClient # # Usage examples: # PYTHONPATH=. python RL/infer_battleground_cloud.py \ # --input RL/datasets/game_history_2_flat.json \ # --output RL/datasets/game_history_2_actions.jsonl \ # --model-id iteratehack/deepbattler-battleground-gamehistory # # or, if you deploy a dedicated Inference Endpoint: # PYTHONPATH=. python RL/infer_battleground_cloud.py \ # --input RL/datasets/game_history_2_flat.json \ # --output RL/datasets/game_history_2_actions.jsonl \ # --endpoint https://.inference.huggingface.cloud # # The script expects the same "state" structure and action JSON schema as # train_battleground_rlaif_gamehistory.py. import argparse import json from pathlib import Path from typing import Any, Dict, List, Optional import requests from huggingface_hub import InferenceClient from RL.battleground_nl_utils import game_state_to_natural_language INSTRUCTION_PREFIX = """You are a Hearthstone Battlegrounds AI. Given the current game state as a JSON object, choose the best full-turn sequence of actions and respond with a single JSON object in this exact format: {"actions":[{"type":"","tavern_index":,"hand_index":,"board_index":,"card_name":}, ...]} Rules: 1. Respond with JSON only. Do not add explanations or any extra text. 2. The top-level object must have exactly one key: "actions". 3. "actions" must be a JSON array (possibly empty, but usually 1+ steps) of atomic action objects. 4. Use 0-based integers for indices or null when not used. 5. "type" must be one of: "BUY_FROM_TAVERN","PLAY_FROM_HAND","SELL_FROM_BOARD", "HERO_POWER","ROLL","UPGRADE_TAVERN","FREEZE","END_TURN". 6. "card_name" must exactly match a card name from the game state when required, otherwise null. Now here is the game state JSON: """ INSTRUCTION_PREFIX_NL = """You are a Hearthstone Battlegrounds AI. Given the following natural language description of the current game state, choose the best full-turn sequence of actions and respond with a single JSON object in this exact format: {"actions":[{"type":"","tavern_index":,"hand_index":,"board_index":,"card_name":}, ...]} Rules: 1. Respond with JSON only. Do not add explanations or any extra text. 2. The top-level object must have exactly one key: "actions". 3. "actions" must be a JSON array (possibly empty, but usually 1+ steps) of atomic action objects. 4. Use 0-based integers for indices or null when not used. 5. "type" must be one of: "BUY_FROM_TAVERN","PLAY_FROM_HAND","SELL_FROM_BOARD", "HERO_POWER","ROLL","UPGRADE_TAVERN","FREEZE","END_TURN". 6. "card_name" must exactly match a card name from the game state when required, otherwise null. Now here is the description of the game state: """ def build_prompt(example: Dict[str, Any], input_mode: str = "json") -> str: """Build a prompt from a flattened game_history-style example. This mirrors _build_prompt in train_battleground_rlaif_gamehistory.py so that the inference distribution matches training. The example should have: - phase: string (e.g., "PlayerTurn") - turn: int - state: nested dict with keys: game_state, player_hero, resources, board_state """ state = example.get("state", {}) or {} if input_mode == "nl": nl_state = game_state_to_natural_language(state) prefix = INSTRUCTION_PREFIX_NL state_text = nl_state else: gs = state.get("game_state", {}) or {} phase = example.get("phase", gs.get("phase", "PlayerTurn")) turn = example.get("turn", gs.get("turn_number", 0)) obj = { "task": "battlegrounds_policy_v1", "phase": phase, "turn": turn, "state": state, } state_text = json.dumps(obj, separators=(",", ":"), ensure_ascii=False) prefix = INSTRUCTION_PREFIX return prefix + "\n" + state_text def parse_actions_from_completion(text: str) -> Optional[List[Dict[str, Any]]]: """Parse a model completion into a list of atomic action dicts. Expected formats (same as training reward parser): - {"actions": [ {...}, {...}, ... ]} - {"action": [ {...}, {...}, ... ]} # tolerated fallback """ text = text.strip() start_idx = text.find("{") if start_idx == -1: return None end_idx = text.rfind("}") if end_idx == -1: return None json_str = text[start_idx : end_idx + 1] try: obj = json.loads(json_str) except Exception: return None if not isinstance(obj, dict): return None seq = None if "actions" in obj: if isinstance(obj["actions"], list): seq = obj["actions"] elif isinstance(obj["actions"], dict): seq = [obj["actions"]] elif "action" in obj: if isinstance(obj["action"], list): seq = obj["action"] elif isinstance(obj["action"], dict): seq = [obj["action"]] if seq is None: return None actions: List[Dict[str, Any]] = [] for step in seq: if not isinstance(step, dict): return None actions.append(step) return actions def run_inference_via_client( client: InferenceClient, examples: List[Dict[str, Any]], input_mode: str = "json", max_new_tokens: int = 256, temperature: float = 0.2, ) -> List[Dict[str, Any]]: """Run inference over a list of examples and return enriched records. Each output row is the original example plus: - actions: parsed list of atomic action dicts (or None on parse failure) - raw_completion: raw text returned by the model """ results: List[Dict[str, Any]] = [] for ex in examples: prompt = build_prompt(ex, input_mode=input_mode) completion = client.text_generation( prompt, max_new_tokens=max_new_tokens, temperature=temperature, ) actions = parse_actions_from_completion(completion) out_row = dict(ex) out_row["raw_completion"] = completion out_row["actions"] = actions results.append(out_row) return results def run_inference_via_space( space_url: str, examples: List[Dict[str, Any]], max_new_tokens: int = 256, temperature: float = 0.2, timeout: int = 120, hf_token: Optional[str] = None, ) -> List[Dict[str, Any]]: """Call the deployed Space /generate_actions endpoint for each example.""" base_url = space_url.rstrip("/") endpoint = f"{base_url}/generate_actions" headers = {"Content-Type": "application/json"} if hf_token: headers["Authorization"] = f"Bearer {hf_token}" results: List[Dict[str, Any]] = [] for ex in examples: payload = { "phase": ex.get("phase"), "turn": ex.get("turn"), "state": ex.get("state", {}), "max_new_tokens": max_new_tokens, "temperature": temperature, } resp = requests.post(endpoint, json=payload, headers=headers, timeout=timeout) resp.raise_for_status() data = resp.json() out_row = dict(ex) out_row["actions"] = data.get("actions") out_row["raw_completion"] = data.get("raw_completion") results.append(out_row) return results def load_examples(path: str) -> List[Dict[str, Any]]: p = Path(path) if not p.exists(): raise FileNotFoundError(path) with p.open("r", encoding="utf-8") as f: data = json.load(f) if not isinstance(data, list): raise ValueError("Expected input JSON to be a list of examples (flat rows)") return data def save_results(path: str, rows: List[Dict[str, Any]]) -> None: p = Path(path) p.parent.mkdir(parents=True, exist_ok=True) with p.open("w", encoding="utf-8") as f: for row in rows: f.write(json.dumps(row, ensure_ascii=False) + "\n") def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Run cloud inference for Battlegrounds Qwen model via Hugging Face.", ) parser.add_argument( "--input", required=True, help="Path to input JSON file (list of flattened game_history rows).", ) parser.add_argument( "--output", required=True, help="Path to output JSONL file with actions and raw completions.", ) parser.add_argument( "--space-url", default=None, help=( "URL of the Hugging Face Space hosting /generate_actions (e.g. " "https://iteratehack-deepbattler.hf.space). If provided, the script calls " "that endpoint instead of the Inference API." ), ) parser.add_argument( "--model-id", default=None, help=( "Hugging Face model repo id (e.g. iteratehack/deepbattler-battleground-gamehistory). " "Used only if --space-url is omitted." ), ) parser.add_argument( "--endpoint", default=None, help=( "Full URL of a dedicated Inference Endpoint. If provided (and --space-url missing), " "this takes precedence over --model-id." ), ) parser.add_argument( "--hf-token", default=None, help=( "Hugging Face access token. Needed for private Spaces/models. If omitted, use the token " "from `huggingface-cli login` or HF_TOKEN env var." ), ) parser.add_argument( "--input-mode", choices=["json", "nl"], default="json", help="Match the input_mode used during training (json or nl).", ) parser.add_argument("--max-new-tokens", type=int, default=256) parser.add_argument("--temperature", type=float, default=0.2) parser.add_argument( "--request-timeout", type=int, default=120, help="Timeout (seconds) for HTTP requests when using --space-url", ) parser.add_argument( "--print-results", action="store_true", help="Print each output row (JSON) to stdout after inference.", ) args = parser.parse_args() if not any([args.space_url, args.endpoint, args.model_id]): parser.error("Provide --space-url, --endpoint, or --model-id") return args def main() -> None: args = parse_args() examples = load_examples(args.input) if args.space_url: results = run_inference_via_space( args.space_url, examples, max_new_tokens=args.max_new_tokens, temperature=args.temperature, timeout=args.request_timeout, hf_token=args.hf_token, ) else: if args.endpoint: client = InferenceClient(args.endpoint, token=args.hf_token) else: client = InferenceClient(args.model_id, token=args.hf_token) results = run_inference_via_client( client, examples, input_mode=args.input_mode, max_new_tokens=args.max_new_tokens, temperature=args.temperature, ) save_results(args.output, results) print(f"Wrote {len(results)} rows to {args.output}") if args.print_results: for row in results: print(json.dumps(row, ensure_ascii=False)) if __name__ == "__main__": main()