DarshanScripts commited on
Commit
1ec9780
·
verified ·
1 Parent(s): 390fdeb

Upload stratego/benchmarking/metrics.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. stratego/benchmarking/metrics.py +101 -0
stratego/benchmarking/metrics.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # stratego/benchmarking/metrics.py
2
+
3
+ def init_metrics():
4
+ return {
5
+ "games": 0,
6
+ "wins_p0": 0,
7
+ "wins_p1": 0,
8
+ "draws": 0,
9
+ "end_draw": 0,
10
+ "end_invalid": 0,
11
+ "end_flag": 0,
12
+ "end_no_moves": 0,
13
+ "end_turn_limit": 0,
14
+ "turns": [],
15
+ "invalid_p0": 0,
16
+ "invalid_p1": 0,
17
+ "repetitions": []
18
+ }
19
+
20
+
21
+ def update_metrics(m, r):
22
+ m["games"] += 1
23
+
24
+ if r["winner"] == 0:
25
+ m["wins_p0"] += 1
26
+ elif r["winner"] == 1:
27
+ m["wins_p1"] += 1
28
+ else:
29
+ m["draws"] += 1
30
+
31
+ reason = r["game_end_reason"] or ""
32
+ reason_lower = reason.lower()
33
+ winner = r.get("winner", -1)
34
+ flag_captured = r.get("flag_captured", False)
35
+
36
+ # If a flag was captured, trust that signal first
37
+ if flag_captured or "flag" in reason_lower:
38
+ m["end_flag"] += 1
39
+ else:
40
+ if winner == -1:
41
+ # Only count draws when the game result is actually a draw
42
+ if "invalid" in reason_lower:
43
+ m["end_invalid"] += 1
44
+ elif "flag" in reason_lower:
45
+ m["end_flag"] += 1
46
+ elif ("no legal" in reason_lower or
47
+ "no more movable pieces" in reason_lower or
48
+ "no moves" in reason_lower):
49
+ m["end_no_moves"] += 1
50
+ elif "turn limit" in reason_lower:
51
+ m["end_turn_limit"] += 1
52
+ elif "draw" in reason_lower or "repetition" in reason_lower or "stalemate" in reason_lower:
53
+ m["end_draw"] += 1
54
+ else:
55
+ m["end_draw"] += 1 # fallback for unknown draw reasons
56
+ else:
57
+ # Non-draw outcomes
58
+ if "invalid" in reason_lower:
59
+ m["end_invalid"] += 1
60
+ elif "repetition" in reason_lower:
61
+ m["end_no_moves"] += 1
62
+ elif "flag" in reason_lower:
63
+ m["end_flag"] += 1
64
+ elif ("no legal" in reason_lower or
65
+ "no more movable pieces" in reason_lower or
66
+ "no moves" in reason_lower):
67
+ m["end_no_moves"] += 1
68
+ elif "turn limit" in reason_lower:
69
+ m["end_turn_limit"] += 1
70
+ else:
71
+ # If we have a winner but no clear reason, assume win by flag if captured, else by no-moves.
72
+ if flag_captured:
73
+ m["end_flag"] += 1
74
+ else:
75
+ m["end_no_moves"] += 1
76
+
77
+ m["turns"].append(r["turns"])
78
+ m["invalid_p0"] += r["invalid_moves_p0"]
79
+ m["invalid_p1"] += r["invalid_moves_p1"]
80
+ m["repetitions"].append(r["repetitions"])
81
+
82
+
83
+ def summarize(m):
84
+ g = max(1, m["games"])
85
+ return {
86
+ "Games": g,
87
+ "Wins P0": m["wins_p0"],
88
+ "Wins P1": m["wins_p1"],
89
+ "Draws": m["draws"],
90
+ "Win Rate P0": m["wins_p0"] / g,
91
+ "Win Rate P1": m["wins_p1"] / g,
92
+ "Avg Turns": sum(m["turns"]) / g,
93
+ "Avg Invalid Moves P0": m["invalid_p0"] / g,
94
+ "Avg Invalid Moves P1": m["invalid_p1"] / g,
95
+ "Avg Repetitions": sum(m["repetitions"]) / g,
96
+ "Ended by Invalid": m["end_invalid"],
97
+ "Ended by Flag": m["end_flag"],
98
+ "Ended by Draw": m["end_draw"],
99
+ "Ended by No Moves": m["end_no_moves"],
100
+ "Ended by Turn Limit": m["end_turn_limit"]
101
+ }