Spaces:
Running
Running
| from __future__ import annotations | |
| import re | |
| import unicodedata | |
| NAME_ALIASES = { | |
| # ---- Batters ---- | |
| "shohei ohtani": "shohei ohtani", | |
| "shōhei ohtani": "shohei ohtani", | |
| "seiya suzuki": "seiya suzuki", | |
| "masataka yoshida": "masataka yoshida", | |
| "mookie betts": "mookie betts", | |
| "fernando tatis jr": "fernando tatis jr", | |
| "fernando tatis jr.": "fernando tatis jr", | |
| "vladimir guerrero jr": "vladimir guerrero jr", | |
| "vladimir guerrero jr.": "vladimir guerrero jr", | |
| "ronald acuña jr": "ronald acuna jr", | |
| "ronald acuna jr": "ronald acuna jr", | |
| "ronald acuña jr.": "ronald acuna jr", | |
| "ronald acuna jr.": "ronald acuna jr", | |
| "juan soto": "juan soto", | |
| "julio rodriguez": "julio rodriguez", | |
| "julio rodríguez": "julio rodriguez", | |
| "jose ramirez": "jose ramirez", | |
| "josé ramírez": "jose ramirez", | |
| "yordan alvarez": "yordan alvarez", | |
| "yordan álvarez": "yordan alvarez", | |
| "luis robert jr": "luis robert jr", | |
| "luis robert jr.": "luis robert jr", | |
| "bo bichette": "bo bichette", | |
| "manny machado": "manny machado", | |
| "xander bogaerts": "xander bogaerts", | |
| "rafael devers": "rafael devers", | |
| "ketel marte": "ketel marte", | |
| "isaac paredes": "isaac paredes", | |
| "andy pages": "andy pages", | |
| # ---- Pitchers ---- | |
| # Jr./Sr. variants — canonical form keeps suffix when that is how statcast stores the name | |
| "nestor cortes jr": "nestor cortes jr", | |
| "nestor cortes": "nestor cortes jr", # odds APIs sometimes omit Jr | |
| "néstor cortés jr": "nestor cortes jr", | |
| "néstor cortés": "nestor cortes jr", | |
| # International names with diacritics commonly mis-encoded by odds sources | |
| "framber valdéz": "framber valdez", | |
| "framber valdez": "framber valdez", | |
| "sandy alcántara": "sandy alcantara", | |
| "sandy alcantara": "sandy alcantara", | |
| "pablo lópez": "pablo lopez", | |
| "pablo lopez": "pablo lopez", | |
| "ranger suárez": "ranger suarez", | |
| "ranger suarez": "ranger suarez", | |
| "josé berríos": "jose berrios", | |
| "jose berrios": "jose berrios", | |
| "josé quintana": "jose quintana", | |
| "jose quintana": "jose quintana", | |
| "martín pérez": "martin perez", | |
| "martin perez": "martin perez", | |
| "eduardo rodríguez": "eduardo rodriguez", | |
| "eduardo rodriguez": "eduardo rodriguez", | |
| "cristopher sánchez": "cristopher sanchez", | |
| "cristopher sanchez": "cristopher sanchez", | |
| "adrián houser": "adrian houser", | |
| "adrian houser": "adrian houser", | |
| "julio urías": "julio urias", | |
| "julio urias": "julio urias", | |
| "yonny chirinos": "yonny chirinos", | |
| "yusei kikuchi": "yusei kikuchi", | |
| "yoshinobu yamamoto": "yoshinobu yamamoto", | |
| "kodai senga": "kodai senga", | |
| "shōta imanaga": "shota imanaga", | |
| "shota imanaga": "shota imanaga", | |
| } | |
| def _strip_accents(text: str) -> str: | |
| normalized = unicodedata.normalize("NFKD", text) | |
| return "".join(ch for ch in normalized if not unicodedata.combining(ch)) | |
| def normalize_player_name(name: str) -> str: | |
| text = str(name or "").strip().lower() | |
| text = _strip_accents(text) | |
| text = text.replace("’", "'") | |
| text = text.replace(".", "") | |
| text = re.sub(r"\s+", " ", text) | |
| # Remove common suffix punctuation inconsistencies | |
| text = text.replace(" jr ", " jr ") | |
| text = text.replace(" sr ", " sr ") | |
| return NAME_ALIASES.get(text, text) | |
| def normalize_pitcher_name(name: str) -> str: | |
| """Canonical normalizer for pitcher names. | |
| Identical pipeline to normalize_player_name() — strips accents, removes | |
| punctuation, collapses whitespace, applies NAME_ALIASES — exposed as a | |
| separate entrypoint so pitcher-side imports are unambiguous and the alias | |
| table can be extended with pitcher-specific entries without touching the | |
| batter path. | |
| """ | |
| return normalize_player_name(name) | |
| def map_odds_name_to_model_name(name: str) -> str: | |
| return normalize_player_name(name) |