File size: 3,825 Bytes
2fc45a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# stratego/datasets/builder.py
"""

Build Hugging Face Datasets from Stratego game CSV logs.

"""

from __future__ import annotations
import os
import csv
from pathlib import Path
from typing import Optional, List, Dict, Any

try:
    from datasets import Dataset
    HF_AVAILABLE = True
except ImportError:
    HF_AVAILABLE = False
    Dataset = None


class GameDatasetBuilder:
    """

    Builds a Hugging Face Dataset from CSV game logs.

    """
    
    def __init__(self, logs_dir: str = "logs/games"):
        if not HF_AVAILABLE:
            raise ImportError(
                "Hugging Face datasets not installed. "
                "Run: pip install datasets huggingface_hub"
            )
        self.logs_dir = Path(logs_dir)
        self.moves: List[Dict[str, Any]] = []
        
    def _parse_csv_file(self, csv_path: Path) -> List[Dict[str, Any]]:
        """Parse a single game CSV file into move records."""
        moves = []
        game_id = csv_path.stem
        
        try:
            with open(csv_path, "r", encoding="utf-8") as f:
                reader = csv.DictReader(f)
                for row in reader:
                    move_record = {
                        "game_id": game_id,
                        "turn": int(row.get("turn", 0)),
                        "player": int(row.get("player", 0)),
                        "model_name": row.get("model_name", "unknown"),
                        "move": row.get("move", ""),
                        "from_pos": row.get("from_pos", ""),
                        "to_pos": row.get("to_pos", ""),
                        "piece_type": row.get("piece_type", ""),
                        # New training-relevant fields
                        "board_state": row.get("board_state", ""),
                        "available_moves": row.get("available_moves", ""),
                        "move_direction": row.get("move_direction", ""),
                        "target_piece": row.get("target_piece", ""),
                        "battle_outcome": row.get("battle_outcome", ""),
                        "prompt_name": row.get("prompt_name", ""),
                        "game_type": row.get("game_type", "standard"),
                        "board_size": int(row.get("board_size", 10)) if row.get("board_size") else 10,
                        "game_winner": row.get("game_winner", ""),
                        "game_result": row.get("game_result", ""),
                    }
                    moves.append(move_record)
        except Exception as e:
            print(f"Error parsing {csv_path}: {e}")
            
        return moves
    
    def scan_logs(self) -> int:
        """Scan logs directory and load all CSV files."""
        self.moves = []
        
        if not self.logs_dir.exists():
            return 0
            
        csv_files = list(self.logs_dir.glob("*.csv"))
        
        for csv_path in csv_files:
            game_moves = self._parse_csv_file(csv_path)
            self.moves.extend(game_moves)
                
        return len(csv_files)
    
    def build(self) -> "Dataset":
        """Build a Dataset from all game logs."""
        if not self.moves:
            self.scan_logs()
            
        if not self.moves:
            raise ValueError("No moves found in logs directory.")
            
        return Dataset.from_list(self.moves)


def build_dataset_from_logs(logs_dir: str = "logs/games") -> "Dataset":
    """

    Build a dataset from game logs.

    

    Args:

        logs_dir: Path to directory containing game CSV files

        

    Returns:

        Dataset with all moves

    """
    builder = GameDatasetBuilder(logs_dir)
    builder.scan_logs()
    return builder.build()