dota2tuned / scripts /audit_confidence.py
Dracufeuer's picture
Sync confidence expansion gate
f79d69c verified
Raw
History Blame Contribute Delete
6.19 kB
from __future__ import annotations
import argparse
import json
import sys
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
import polars as pl
from dota2tuned.recommend import DraftRecommender
from dota2tuned.schemas import DraftInput
ROLES = [None, "carry", "mid", "offlane", "soft support", "hard support"]
def _confidence(sample_size: int, threshold: int) -> str:
if sample_size >= threshold:
return "high"
if sample_size >= 100:
return "medium"
return "low"
def _read_parquet(path: Path) -> pl.DataFrame:
if not path.exists():
raise FileNotFoundError(f"missing parquet file: {path}")
return pl.read_parquet(path)
def _hero_samples(parquet_dir: Path, threshold: int) -> tuple[dict[str, Any], pl.DataFrame]:
heroes = _read_parquet(parquet_dir / "dim_hero.parquet")
players = _read_parquet(parquet_dir / "fact_player_match.parquet")
if "pro_pick" not in heroes.columns:
heroes = heroes.with_columns(pl.lit(0).alias("pro_pick"))
counts = players.group_by("hero_id").agg(pl.len().cast(pl.Int64).alias("player_games"))
base = (
heroes.select(["hero_id", "hero_name", "pro_pick"])
.join(counts, on="hero_id", how="left")
.with_columns(pl.col("player_games").fill_null(0))
.with_columns(pl.max_horizontal("player_games", "pro_pick").alias("sample_size"))
.with_columns(
pl.col("sample_size")
.map_elements(lambda value: _confidence(int(value), threshold), return_dtype=pl.String)
.alias("confidence")
)
)
summary = base.select(
[
pl.len().alias("heroes"),
(pl.col("sample_size") >= threshold).sum().alias("high"),
((pl.col("sample_size") >= 100) & (pl.col("sample_size") < threshold))
.sum()
.alias("medium"),
(pl.col("sample_size") < 100).sum().alias("low"),
pl.col("sample_size").min().alias("min"),
pl.col("sample_size").quantile(0.25).alias("p25"),
pl.col("sample_size").median().alias("median"),
pl.col("sample_size").quantile(0.75).alias("p75"),
pl.col("sample_size").max().alias("max"),
]
).to_dicts()[0]
summary["threshold"] = threshold
summary["max_confidence"] = summary["low"] == 0 and summary["medium"] == 0
return summary, base
def _recommendation_surfaces(parquet_dir: Path, threshold: int) -> list[dict[str, Any]]:
recommender = DraftRecommender(parquet_dir)
surfaces: list[tuple[str, DraftInput]] = [
(role or "any", DraftInput(role=role)) for role in ROLES
]
surfaces.append(
(
"demo_pa_wd_mid",
DraftInput(enemy_heroes=[44, 30], role="mid", scope="pro", patch="current"),
)
)
rows = []
for label, draft in surfaces:
recs = recommender.recommend(draft, limit=8)
low = [rec for rec in recs if rec.sample_size < threshold]
rows.append(
{
"surface": label,
"all_high": len(low) == 0 and bool(recs),
"min_sample": min((rec.sample_size for rec in recs), default=0),
"recommendations": [
{
"hero": rec.hero_name,
"sample_size": rec.sample_size,
"confidence": rec.confidence,
}
for rec in recs
],
}
)
return rows
def build_payload(parquet_dir: Path, threshold: int, lowest: int) -> dict[str, Any]:
summary, base = _hero_samples(parquet_dir, threshold)
low_heroes = (
base.filter(pl.col("sample_size") < threshold)
.sort("sample_size")
.select(["hero_id", "hero_name", "player_games", "pro_pick", "sample_size", "confidence"])
.head(lowest)
.to_dicts()
)
surfaces = _recommendation_surfaces(parquet_dir, threshold)
return {
"generated_at": datetime.now(UTC).isoformat(),
"parquet_dir": str(parquet_dir),
"hero_samples": summary,
"lowest_sample_heroes": low_heroes,
"recommendation_surfaces": surfaces,
}
def print_report(payload: dict[str, Any]) -> None:
hero = payload["hero_samples"]
print(
"hero confidence: "
f"high={hero['high']} medium={hero['medium']} low={hero['low']} "
f"min={hero['min']} median={hero['median']} max={hero['max']} "
f"threshold={hero['threshold']}"
)
print(f"max_confidence={hero['max_confidence']}")
print("lowest sample heroes:")
for row in payload["lowest_sample_heroes"]:
print(
f"- {row['hero_name']}: sample={row['sample_size']} "
f"players={row['player_games']} pro_pick={row['pro_pick']} "
f"confidence={row['confidence']}"
)
print("recommendation surfaces:")
for surface in payload["recommendation_surfaces"]:
print(
f"- {surface['surface']}: all_high={surface['all_high']} "
f"min_sample={surface['min_sample']} "
f"top={[item['hero'] for item in surface['recommendations'][:3]]}"
)
def main() -> None:
parser = argparse.ArgumentParser(description="Audit DOTA2Tuned recommendation confidence.")
parser.add_argument("--parquet-dir", type=Path, default=Path("data/parquet"))
parser.add_argument("--threshold", type=int, default=500)
parser.add_argument("--lowest", type=int, default=25)
parser.add_argument("--output", type=Path)
parser.add_argument(
"--fail-under-max",
action="store_true",
help="Exit 1 unless every hero sample is at or above the threshold.",
)
args = parser.parse_args()
payload = build_payload(args.parquet_dir, args.threshold, args.lowest)
if args.output:
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8")
print_report(payload)
if args.fail_under_max and not payload["hero_samples"]["max_confidence"]:
sys.exit(1)
if __name__ == "__main__":
main()