Spaces:
Configuration error
Configuration error
| # stratego/datasets/builder.py | |
| """ | |
| Build Hugging Face Datasets from Stratego game CSV logs. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import csv | |
| from pathlib import Path | |
| from typing import Optional, List, Dict, Any | |
| try: | |
| from datasets import Dataset | |
| HF_AVAILABLE = True | |
| except ImportError: | |
| HF_AVAILABLE = False | |
| Dataset = None | |
| class GameDatasetBuilder: | |
| """ | |
| Builds a Hugging Face Dataset from CSV game logs. | |
| """ | |
| def __init__(self, logs_dir: str = "logs/games"): | |
| if not HF_AVAILABLE: | |
| raise ImportError( | |
| "Hugging Face datasets not installed. " | |
| "Run: pip install datasets huggingface_hub" | |
| ) | |
| self.logs_dir = Path(logs_dir) | |
| self.moves: List[Dict[str, Any]] = [] | |
| def _parse_csv_file(self, csv_path: Path) -> List[Dict[str, Any]]: | |
| """Parse a single game CSV file into move records.""" | |
| moves = [] | |
| game_id = csv_path.stem | |
| try: | |
| with open(csv_path, "r", encoding="utf-8") as f: | |
| reader = csv.DictReader(f) | |
| for row in reader: | |
| move_record = { | |
| "game_id": game_id, | |
| "turn": int(row.get("turn", 0)), | |
| "player": int(row.get("player", 0)), | |
| "model_name": row.get("model_name", "unknown"), | |
| "move": row.get("move", ""), | |
| "from_pos": row.get("from_pos", ""), | |
| "to_pos": row.get("to_pos", ""), | |
| "piece_type": row.get("piece_type", ""), | |
| # New training-relevant fields | |
| "board_state": row.get("board_state", ""), | |
| "available_moves": row.get("available_moves", ""), | |
| "move_direction": row.get("move_direction", ""), | |
| "target_piece": row.get("target_piece", ""), | |
| "battle_outcome": row.get("battle_outcome", ""), | |
| "prompt_name": row.get("prompt_name", ""), | |
| "game_type": row.get("game_type", "standard"), | |
| "board_size": int(row.get("board_size", 10)) if row.get("board_size") else 10, | |
| "game_winner": row.get("game_winner", ""), | |
| "game_result": row.get("game_result", ""), | |
| } | |
| moves.append(move_record) | |
| except Exception as e: | |
| print(f"Error parsing {csv_path}: {e}") | |
| return moves | |
| def scan_logs(self) -> int: | |
| """Scan logs directory and load all CSV files.""" | |
| self.moves = [] | |
| if not self.logs_dir.exists(): | |
| return 0 | |
| csv_files = list(self.logs_dir.glob("*.csv")) | |
| for csv_path in csv_files: | |
| game_moves = self._parse_csv_file(csv_path) | |
| self.moves.extend(game_moves) | |
| return len(csv_files) | |
| def build(self) -> "Dataset": | |
| """Build a Dataset from all game logs.""" | |
| if not self.moves: | |
| self.scan_logs() | |
| if not self.moves: | |
| raise ValueError("No moves found in logs directory.") | |
| return Dataset.from_list(self.moves) | |
| def build_dataset_from_logs(logs_dir: str = "logs/games") -> "Dataset": | |
| """ | |
| Build a dataset from game logs. | |
| Args: | |
| logs_dir: Path to directory containing game CSV files | |
| Returns: | |
| Dataset with all moves | |
| """ | |
| builder = GameDatasetBuilder(logs_dir) | |
| builder.scan_logs() | |
| return builder.build() | |