Syntrex commited on
Commit
c32052f
·
verified ·
1 Parent(s): 4c2f97d

Update models/batter_zone_model.py

Browse files
Files changed (1) hide show
  1. models/batter_zone_model.py +32 -1
models/batter_zone_model.py CHANGED
@@ -3,7 +3,8 @@ from __future__ import annotations
3
  from typing import Any
4
 
5
  import pandas as pd
6
-
 
7
 
8
  PITCH_FAMILY_MAP = {
9
  "4-seam fastball": "fastball",
@@ -25,6 +26,36 @@ PITCH_FAMILY_MAP = {
25
  "circle change": "offspeed",
26
  }
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def _safe_mean(series: pd.Series) -> float | None:
30
  numeric = pd.to_numeric(series, errors="coerce").dropna()
 
3
  from typing import Any
4
 
5
  import pandas as pd
6
+ import re
7
+ import unicodedata
8
 
9
  PITCH_FAMILY_MAP = {
10
  "4-seam fastball": "fastball",
 
26
  "circle change": "offspeed",
27
  }
28
 
29
+ def _normalize_name_text(name: str) -> str:
30
+ text = str(name or "").strip().lower()
31
+
32
+ text = unicodedata.normalize("NFKD", text)
33
+ text = "".join(ch for ch in text if not unicodedata.combining(ch))
34
+
35
+ text = text.replace(",", " ")
36
+ text = re.sub(r"\s+", " ", text).strip()
37
+
38
+ return text
39
+
40
+
41
+ def _to_last_first_variants(name: str) -> set[str]:
42
+ normalized = _normalize_name_text(name)
43
+ if not normalized:
44
+ return set()
45
+
46
+ parts = normalized.split()
47
+ variants = {normalized}
48
+
49
+ if len(parts) >= 2:
50
+ first = parts[0]
51
+ last = parts[-1]
52
+ middle = " ".join(parts[1:-1]).strip()
53
+
54
+ if middle:
55
+ variants.add(f"{last} {first} {middle}".strip())
56
+ variants.add(f"{last} {first}".strip())
57
+
58
+ return variants
59
 
60
  def _safe_mean(series: pd.Series) -> float | None:
61
  numeric = pd.to_numeric(series, errors="coerce").dropna()