Update player_matcher.py
Browse files- player_matcher.py +66 -90
player_matcher.py
CHANGED
|
@@ -1,63 +1,43 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Player Name Matching Engine
|
| 3 |
-
Scans text for NBA player mentions and returns matched players.
|
| 4 |
-
"""
|
| 5 |
|
| 6 |
import re
|
| 7 |
from nba_players import NBA_PLAYERS
|
| 8 |
|
| 9 |
-
# Build
|
| 10 |
-
|
| 11 |
FIRST_NAMES = {}
|
| 12 |
LAST_NAMES = {}
|
| 13 |
|
| 14 |
for player in NBA_PLAYERS:
|
| 15 |
parts = player.split()
|
| 16 |
if len(parts) >= 2:
|
| 17 |
-
first = parts[0]
|
| 18 |
-
last
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
PLAYER_PATTERNS[player.lower()] = player
|
| 22 |
-
|
| 23 |
-
# Track first and last names for partial matching
|
| 24 |
-
if first.lower() not in FIRST_NAMES:
|
| 25 |
-
FIRST_NAMES[first.lower()] = []
|
| 26 |
-
FIRST_NAMES[first.lower()].append(player)
|
| 27 |
-
|
| 28 |
-
if last.lower() not in LAST_NAMES:
|
| 29 |
-
LAST_NAMES[last.lower()] = []
|
| 30 |
-
LAST_NAMES[last.lower()].append(player)
|
| 31 |
|
| 32 |
-
#
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
"
|
| 38 |
-
"
|
| 39 |
-
"
|
| 40 |
-
"
|
| 41 |
-
"
|
| 42 |
-
"
|
| 43 |
-
"
|
| 44 |
-
"
|
| 45 |
-
|
| 46 |
-
"
|
| 47 |
-
"mike", "anthony", "cooper", "tre", "cam", "max", "pat", "drew",
|
| 48 |
-
"jordan", "isaiah", "marcus", "jalen", "tyler", "ryan", "kyle",
|
| 49 |
-
"joe", "ben", "sam", "tim", "dan", "tom", "john", "david", "alex",
|
| 50 |
-
"james", "robert", "michael", "william", "richard", "charles",
|
| 51 |
-
"daniel", "matthew", "andrew", "joshua", "christopher", "joseph",
|
| 52 |
-
# Common words that match player names
|
| 53 |
-
"post", "bridges", "rose", "wolf", "wells", "rice", "prince",
|
| 54 |
-
"little", "green", "brown", "white", "black", "gray", "best",
|
| 55 |
-
"freeman", "washington", "boston", "houston", "phoenix", "orlando",
|
| 56 |
-
"brooks", "rivers", "banks", "stone", "wood", "waters", "field",
|
| 57 |
-
"hart", "fox", "dean", "terry", "craig", "ross", "lane", "day",
|
| 58 |
}
|
| 59 |
|
| 60 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
NICKNAMES = {
|
| 62 |
"lebron": "LeBron James",
|
| 63 |
"steph": "Stephen Curry",
|
|
@@ -67,7 +47,6 @@ NICKNAMES = {
|
|
| 67 |
"embiid": "Joel Embiid",
|
| 68 |
"kd": "Kevin Durant",
|
| 69 |
"ad": "Anthony Davis",
|
| 70 |
-
"pg": "Paul George",
|
| 71 |
"cp3": "Chris Paul",
|
| 72 |
"ja": "Ja Morant",
|
| 73 |
"trae": "Trae Young",
|
|
@@ -85,7 +64,6 @@ NICKNAMES = {
|
|
| 85 |
"harden": "James Harden",
|
| 86 |
"westbrook": "Russell Westbrook",
|
| 87 |
"sabonis": "Domantas Sabonis",
|
| 88 |
-
"fox": "De'Aaron Fox",
|
| 89 |
"mobley": "Evan Mobley",
|
| 90 |
"cade": "Cade Cunningham",
|
| 91 |
"paolo": "Paolo Banchero",
|
|
@@ -93,57 +71,55 @@ NICKNAMES = {
|
|
| 93 |
"scottie": "Scottie Barnes",
|
| 94 |
"brunson": "Jalen Brunson",
|
| 95 |
"flagg": "Cooper Flagg",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
}
|
| 97 |
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
def find_player_mentions(text):
|
| 100 |
-
"""
|
| 101 |
-
Find all NBA player mentions in text.
|
| 102 |
-
|
| 103 |
-
Returns:
|
| 104 |
-
dict: {canonical_name: [list of matched strings]}
|
| 105 |
-
"""
|
| 106 |
if not text:
|
| 107 |
return {}
|
| 108 |
|
| 109 |
-
text_lower = text.lower()
|
| 110 |
found = {}
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
-
# Check nicknames
|
| 113 |
for nick, player in NICKNAMES.items():
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
if player not in found:
|
| 117 |
-
found[player] = []
|
| 118 |
-
found[player].append(nick)
|
| 119 |
-
|
| 120 |
-
# Check full names
|
| 121 |
-
for player in NBA_PLAYERS:
|
| 122 |
-
pattern = r'\b' + re.escape(player.lower()) + r'\b'
|
| 123 |
-
if re.search(pattern, text_lower):
|
| 124 |
-
if player not in found:
|
| 125 |
-
found[player] = []
|
| 126 |
-
found[player].append(player)
|
| 127 |
|
| 128 |
-
# Check
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
matches = LAST_NAMES[word]
|
| 138 |
-
if len(matches) == 1:
|
| 139 |
-
player = matches[0]
|
| 140 |
-
if player not in found:
|
| 141 |
-
found[player] = []
|
| 142 |
-
found[player].append(word)
|
| 143 |
|
| 144 |
-
return found
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
def get_mentioned_players(text):
|
| 148 |
-
"""Get just the list of mentioned players."""
|
| 149 |
-
return list(find_player_mentions(text).keys())
|
|
|
|
| 1 |
+
"""Fast Player Name Matching - Optimized for speed"""
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import re
|
| 4 |
from nba_players import NBA_PLAYERS
|
| 5 |
|
| 6 |
+
# Build lookup structures once at import time
|
| 7 |
+
FULL_NAMES = set(p.lower() for p in NBA_PLAYERS)
|
| 8 |
FIRST_NAMES = {}
|
| 9 |
LAST_NAMES = {}
|
| 10 |
|
| 11 |
for player in NBA_PLAYERS:
|
| 12 |
parts = player.split()
|
| 13 |
if len(parts) >= 2:
|
| 14 |
+
first, last = parts[0].lower(), parts[-1].lower()
|
| 15 |
+
if last not in LAST_NAMES:
|
| 16 |
+
LAST_NAMES[last] = []
|
| 17 |
+
LAST_NAMES[last].append(player)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
# Unique last names (only one player has this last name)
|
| 20 |
+
UNIQUE_LAST_NAMES = {k: v[0] for k, v in LAST_NAMES.items() if len(v) == 1}
|
| 21 |
+
|
| 22 |
+
# Common words that should NOT match even if they're unique last names
|
| 23 |
+
EXCLUDED_WORDS = {
|
| 24 |
+
"post", "love", "smart", "holiday", "green", "brown", "white", "black",
|
| 25 |
+
"gray", "king", "young", "long", "little", "best", "hart", "wade",
|
| 26 |
+
"rose", "bridges", "wall", "hill", "rice", "prince", "mann", "day",
|
| 27 |
+
"wells", "reed", "waters", "brooks", "banks", "bell", "ford", "lane",
|
| 28 |
+
"grant", "stone", "wood", "wolf", "fox", "dean", "ross", "terry",
|
| 29 |
+
"craig", "cole", "huff", "cook", "monk", "dunn", "bates", "temple",
|
| 30 |
+
"joe", "ben", "sam", "tim", "dan", "tom", "john", "will", "max",
|
| 31 |
+
"cam", "tre", "pat", "drew", "alex", "nick", "gary", "josh", "mike",
|
| 32 |
+
"chris", "kyle", "ryan", "tyler", "jordan", "isaiah", "marcus", "jalen",
|
| 33 |
+
"boston", "houston", "phoenix", "orlando", "washington", "cleveland",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
}
|
| 35 |
|
| 36 |
+
# Remove excluded words from unique last names
|
| 37 |
+
for word in EXCLUDED_WORDS:
|
| 38 |
+
UNIQUE_LAST_NAMES.pop(word, None)
|
| 39 |
+
|
| 40 |
+
# Nicknames mapping
|
| 41 |
NICKNAMES = {
|
| 42 |
"lebron": "LeBron James",
|
| 43 |
"steph": "Stephen Curry",
|
|
|
|
| 47 |
"embiid": "Joel Embiid",
|
| 48 |
"kd": "Kevin Durant",
|
| 49 |
"ad": "Anthony Davis",
|
|
|
|
| 50 |
"cp3": "Chris Paul",
|
| 51 |
"ja": "Ja Morant",
|
| 52 |
"trae": "Trae Young",
|
|
|
|
| 64 |
"harden": "James Harden",
|
| 65 |
"westbrook": "Russell Westbrook",
|
| 66 |
"sabonis": "Domantas Sabonis",
|
|
|
|
| 67 |
"mobley": "Evan Mobley",
|
| 68 |
"cade": "Cade Cunningham",
|
| 69 |
"paolo": "Paolo Banchero",
|
|
|
|
| 71 |
"scottie": "Scottie Barnes",
|
| 72 |
"brunson": "Jalen Brunson",
|
| 73 |
"flagg": "Cooper Flagg",
|
| 74 |
+
"maxey": "Tyrese Maxey",
|
| 75 |
+
"halliburton": "Tyrese Haliburton",
|
| 76 |
+
"haliburton": "Tyrese Haliburton",
|
| 77 |
+
"garland": "Darius Garland",
|
| 78 |
+
"murray": "Jamal Murray",
|
| 79 |
+
"dejounte": "Dejounte Murray",
|
| 80 |
+
"bam": "Bam Adebayo",
|
| 81 |
+
"herro": "Tyler Herro",
|
| 82 |
+
"jimmy": "Jimmy Butler",
|
| 83 |
+
"kawhi": "Kawhi Leonard",
|
| 84 |
+
"pg13": "Paul George",
|
| 85 |
}
|
| 86 |
|
| 87 |
+
# Precompile word pattern
|
| 88 |
+
WORD_PATTERN = re.compile(r"[a-zA-Z']+")
|
| 89 |
+
|
| 90 |
|
| 91 |
def find_player_mentions(text):
|
| 92 |
+
"""Fast player mention detection."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
if not text:
|
| 94 |
return {}
|
| 95 |
|
|
|
|
| 96 |
found = {}
|
| 97 |
+
text_lower = text.lower()
|
| 98 |
+
words = WORD_PATTERN.findall(text_lower)
|
| 99 |
+
word_set = set(words)
|
| 100 |
|
| 101 |
+
# Check nicknames (fast set lookup)
|
| 102 |
for nick, player in NICKNAMES.items():
|
| 103 |
+
if nick in word_set:
|
| 104 |
+
found[player] = [nick]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
+
# Check full names by looking at word pairs
|
| 107 |
+
for i in range(len(words) - 1):
|
| 108 |
+
full_name = words[i] + " " + words[i + 1]
|
| 109 |
+
if full_name in FULL_NAMES:
|
| 110 |
+
# Find proper case version
|
| 111 |
+
for player in NBA_PLAYERS:
|
| 112 |
+
if player.lower() == full_name:
|
| 113 |
+
if player not in found:
|
| 114 |
+
found[player] = []
|
| 115 |
+
found[player].append(full_name)
|
| 116 |
+
break
|
| 117 |
|
| 118 |
+
# Check unique last names (but not excluded common words)
|
| 119 |
+
for word in word_set:
|
| 120 |
+
if word in UNIQUE_LAST_NAMES:
|
| 121 |
+
player = UNIQUE_LAST_NAMES[word]
|
| 122 |
+
if player not in found:
|
| 123 |
+
found[player] = [word]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
+
return found
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|