Spaces:

DarshanScripts
/

stratego-human-vs-ai

Configuration error

App Files Files Community

DarshanScripts commited on 22 days ago

Commit

1ec9780

verified ·

1 Parent(s): 390fdeb

Upload stratego/benchmarking/metrics.py with huggingface_hub

Browse files

Files changed (1) hide show

stratego/benchmarking/metrics.py +101 -0

stratego/benchmarking/metrics.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# stratego/benchmarking/metrics.py
+def init_metrics():
+    return {
+        "games": 0,
+        "wins_p0": 0,
+        "wins_p1": 0,
+        "draws": 0,
+        "end_draw": 0,
+        "end_invalid": 0,
+        "end_flag": 0,
+        "end_no_moves": 0,
+        "end_turn_limit": 0,
+        "turns": [],
+        "invalid_p0": 0,
+        "invalid_p1": 0,
+        "repetitions": []
+    }
+def update_metrics(m, r):
+    m["games"] += 1
+    if r["winner"] == 0:
+        m["wins_p0"] += 1
+    elif r["winner"] == 1:
+        m["wins_p1"] += 1
+    else:
+        m["draws"] += 1
+    reason = r["game_end_reason"] or ""
+    reason_lower = reason.lower()
+    winner = r.get("winner", -1)
+    flag_captured = r.get("flag_captured", False)
+    # If a flag was captured, trust that signal first
+    if flag_captured or "flag" in reason_lower:
+        m["end_flag"] += 1
+    else:
+        if winner == -1:
+            # Only count draws when the game result is actually a draw
+            if "invalid" in reason_lower:
+                m["end_invalid"] += 1
+            elif "flag" in reason_lower:
+                m["end_flag"] += 1
+            elif ("no legal" in reason_lower or
+                  "no more movable pieces" in reason_lower or
+                  "no moves" in reason_lower):
+                m["end_no_moves"] += 1
+            elif "turn limit" in reason_lower:
+                m["end_turn_limit"] += 1
+            elif "draw" in reason_lower or "repetition" in reason_lower or "stalemate" in reason_lower:
+                m["end_draw"] += 1
+            else:
+                m["end_draw"] += 1  # fallback for unknown draw reasons
+        else:
+            # Non-draw outcomes
+            if "invalid" in reason_lower:
+                m["end_invalid"] += 1
+            elif "repetition" in reason_lower:
+                m["end_no_moves"] += 1
+            elif "flag" in reason_lower:
+                m["end_flag"] += 1
+            elif ("no legal" in reason_lower or
+                  "no more movable pieces" in reason_lower or
+                  "no moves" in reason_lower):
+                m["end_no_moves"] += 1
+            elif "turn limit" in reason_lower:
+                m["end_turn_limit"] += 1
+            else:
+                # If we have a winner but no clear reason, assume win by flag if captured, else by no-moves.
+                if flag_captured:
+                    m["end_flag"] += 1
+                else:
+                    m["end_no_moves"] += 1
+    m["turns"].append(r["turns"])
+    m["invalid_p0"] += r["invalid_moves_p0"]
+    m["invalid_p1"] += r["invalid_moves_p1"]
+    m["repetitions"].append(r["repetitions"])
+def summarize(m):
+    g = max(1, m["games"])
+    return {
+        "Games": g,
+        "Wins P0": m["wins_p0"],
+        "Wins P1": m["wins_p1"],
+        "Draws": m["draws"],
+        "Win Rate P0": m["wins_p0"] / g,
+        "Win Rate P1": m["wins_p1"] / g,
+        "Avg Turns": sum(m["turns"]) / g,
+        "Avg Invalid Moves P0": m["invalid_p0"] / g,
+        "Avg Invalid Moves P1": m["invalid_p1"] / g,
+        "Avg Repetitions": sum(m["repetitions"]) / g,
+        "Ended by Invalid": m["end_invalid"],
+        "Ended by Flag": m["end_flag"],
+        "Ended by Draw": m["end_draw"],
+        "Ended by No Moves": m["end_no_moves"],
+        "Ended by Turn Limit": m["end_turn_limit"]
+    }