Spaces:
Configuration error
Configuration error
File size: 3,825 Bytes
2fc45a7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | # stratego/datasets/builder.py
"""
Build Hugging Face Datasets from Stratego game CSV logs.
"""
from __future__ import annotations
import os
import csv
from pathlib import Path
from typing import Optional, List, Dict, Any
try:
from datasets import Dataset
HF_AVAILABLE = True
except ImportError:
HF_AVAILABLE = False
Dataset = None
class GameDatasetBuilder:
"""
Builds a Hugging Face Dataset from CSV game logs.
"""
def __init__(self, logs_dir: str = "logs/games"):
if not HF_AVAILABLE:
raise ImportError(
"Hugging Face datasets not installed. "
"Run: pip install datasets huggingface_hub"
)
self.logs_dir = Path(logs_dir)
self.moves: List[Dict[str, Any]] = []
def _parse_csv_file(self, csv_path: Path) -> List[Dict[str, Any]]:
"""Parse a single game CSV file into move records."""
moves = []
game_id = csv_path.stem
try:
with open(csv_path, "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
move_record = {
"game_id": game_id,
"turn": int(row.get("turn", 0)),
"player": int(row.get("player", 0)),
"model_name": row.get("model_name", "unknown"),
"move": row.get("move", ""),
"from_pos": row.get("from_pos", ""),
"to_pos": row.get("to_pos", ""),
"piece_type": row.get("piece_type", ""),
# New training-relevant fields
"board_state": row.get("board_state", ""),
"available_moves": row.get("available_moves", ""),
"move_direction": row.get("move_direction", ""),
"target_piece": row.get("target_piece", ""),
"battle_outcome": row.get("battle_outcome", ""),
"prompt_name": row.get("prompt_name", ""),
"game_type": row.get("game_type", "standard"),
"board_size": int(row.get("board_size", 10)) if row.get("board_size") else 10,
"game_winner": row.get("game_winner", ""),
"game_result": row.get("game_result", ""),
}
moves.append(move_record)
except Exception as e:
print(f"Error parsing {csv_path}: {e}")
return moves
def scan_logs(self) -> int:
"""Scan logs directory and load all CSV files."""
self.moves = []
if not self.logs_dir.exists():
return 0
csv_files = list(self.logs_dir.glob("*.csv"))
for csv_path in csv_files:
game_moves = self._parse_csv_file(csv_path)
self.moves.extend(game_moves)
return len(csv_files)
def build(self) -> "Dataset":
"""Build a Dataset from all game logs."""
if not self.moves:
self.scan_logs()
if not self.moves:
raise ValueError("No moves found in logs directory.")
return Dataset.from_list(self.moves)
def build_dataset_from_logs(logs_dir: str = "logs/games") -> "Dataset":
"""
Build a dataset from game logs.
Args:
logs_dir: Path to directory containing game CSV files
Returns:
Dataset with all moves
"""
builder = GameDatasetBuilder(logs_dir)
builder.scan_logs()
return builder.build()
|