Spaces:
Sleeping
Sleeping
Update models/batter_zone_model.py
Browse files- models/batter_zone_model.py +32 -1
models/batter_zone_model.py
CHANGED
|
@@ -3,7 +3,8 @@ from __future__ import annotations
|
|
| 3 |
from typing import Any
|
| 4 |
|
| 5 |
import pandas as pd
|
| 6 |
-
|
|
|
|
| 7 |
|
| 8 |
PITCH_FAMILY_MAP = {
|
| 9 |
"4-seam fastball": "fastball",
|
|
@@ -25,6 +26,36 @@ PITCH_FAMILY_MAP = {
|
|
| 25 |
"circle change": "offspeed",
|
| 26 |
}
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
def _safe_mean(series: pd.Series) -> float | None:
|
| 30 |
numeric = pd.to_numeric(series, errors="coerce").dropna()
|
|
|
|
| 3 |
from typing import Any
|
| 4 |
|
| 5 |
import pandas as pd
|
| 6 |
+
import re
|
| 7 |
+
import unicodedata
|
| 8 |
|
| 9 |
PITCH_FAMILY_MAP = {
|
| 10 |
"4-seam fastball": "fastball",
|
|
|
|
| 26 |
"circle change": "offspeed",
|
| 27 |
}
|
| 28 |
|
| 29 |
+
def _normalize_name_text(name: str) -> str:
|
| 30 |
+
text = str(name or "").strip().lower()
|
| 31 |
+
|
| 32 |
+
text = unicodedata.normalize("NFKD", text)
|
| 33 |
+
text = "".join(ch for ch in text if not unicodedata.combining(ch))
|
| 34 |
+
|
| 35 |
+
text = text.replace(",", " ")
|
| 36 |
+
text = re.sub(r"\s+", " ", text).strip()
|
| 37 |
+
|
| 38 |
+
return text
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _to_last_first_variants(name: str) -> set[str]:
|
| 42 |
+
normalized = _normalize_name_text(name)
|
| 43 |
+
if not normalized:
|
| 44 |
+
return set()
|
| 45 |
+
|
| 46 |
+
parts = normalized.split()
|
| 47 |
+
variants = {normalized}
|
| 48 |
+
|
| 49 |
+
if len(parts) >= 2:
|
| 50 |
+
first = parts[0]
|
| 51 |
+
last = parts[-1]
|
| 52 |
+
middle = " ".join(parts[1:-1]).strip()
|
| 53 |
+
|
| 54 |
+
if middle:
|
| 55 |
+
variants.add(f"{last} {first} {middle}".strip())
|
| 56 |
+
variants.add(f"{last} {first}".strip())
|
| 57 |
+
|
| 58 |
+
return variants
|
| 59 |
|
| 60 |
def _safe_mean(series: pd.Series) -> float | None:
|
| 61 |
numeric = pd.to_numeric(series, errors="coerce").dropna()
|