cdechoch commited on
Commit
ce0fc8a
·
verified ·
1 Parent(s): 4f954e0

Update player_matcher.py

Browse files
Files changed (1) hide show
  1. player_matcher.py +66 -90
player_matcher.py CHANGED
@@ -1,63 +1,43 @@
1
- """
2
- Player Name Matching Engine
3
- Scans text for NBA player mentions and returns matched players.
4
- """
5
 
6
  import re
7
  from nba_players import NBA_PLAYERS
8
 
9
- # Build search patterns
10
- PLAYER_PATTERNS = {}
11
  FIRST_NAMES = {}
12
  LAST_NAMES = {}
13
 
14
  for player in NBA_PLAYERS:
15
  parts = player.split()
16
  if len(parts) >= 2:
17
- first = parts[0]
18
- last = parts[-1]
19
-
20
- # Full name pattern
21
- PLAYER_PATTERNS[player.lower()] = player
22
-
23
- # Track first and last names for partial matching
24
- if first.lower() not in FIRST_NAMES:
25
- FIRST_NAMES[first.lower()] = []
26
- FIRST_NAMES[first.lower()].append(player)
27
-
28
- if last.lower() not in LAST_NAMES:
29
- LAST_NAMES[last.lower()] = []
30
- LAST_NAMES[last.lower()].append(player)
31
 
32
- # Common names that need full name to match (first or last names)
33
- AMBIGUOUS_NAMES = {
34
- # Last names
35
- "james", "johnson", "williams", "brown", "jones", "davis", "smith",
36
- "thompson", "jackson", "white", "harris", "martin", "moore", "taylor",
37
- "thomas", "clark", "robinson", "walker", "green", "hall", "allen",
38
- "young", "king", "wright", "hill", "scott", "adams", "baker", "nelson",
39
- "mitchell", "carter", "roberts", "turner", "phillips", "campbell",
40
- "parker", "evans", "edwards", "collins", "stewart", "morris", "murphy",
41
- "gordon", "murray", "barnes", "curry", "ball", "wagner", "miller",
42
- "porter", "grant", "reed", "powell", "butler", "jordan", "paul",
43
- "hunter", "love", "smart", "holiday", "mann", "wade", "monk", "dunn",
44
- "nance", "cole", "reed", "huff", "temple", "bates", "cook", "bell",
45
- # First names that are common
46
- "will", "aaron", "chris", "kevin", "gary", "nick", "cole", "josh",
47
- "mike", "anthony", "cooper", "tre", "cam", "max", "pat", "drew",
48
- "jordan", "isaiah", "marcus", "jalen", "tyler", "ryan", "kyle",
49
- "joe", "ben", "sam", "tim", "dan", "tom", "john", "david", "alex",
50
- "james", "robert", "michael", "william", "richard", "charles",
51
- "daniel", "matthew", "andrew", "joshua", "christopher", "joseph",
52
- # Common words that match player names
53
- "post", "bridges", "rose", "wolf", "wells", "rice", "prince",
54
- "little", "green", "brown", "white", "black", "gray", "best",
55
- "freeman", "washington", "boston", "houston", "phoenix", "orlando",
56
- "brooks", "rivers", "banks", "stone", "wood", "waters", "field",
57
- "hart", "fox", "dean", "terry", "craig", "ross", "lane", "day",
58
  }
59
 
60
- # Famous nicknames -> canonical names
 
 
 
 
61
  NICKNAMES = {
62
  "lebron": "LeBron James",
63
  "steph": "Stephen Curry",
@@ -67,7 +47,6 @@ NICKNAMES = {
67
  "embiid": "Joel Embiid",
68
  "kd": "Kevin Durant",
69
  "ad": "Anthony Davis",
70
- "pg": "Paul George",
71
  "cp3": "Chris Paul",
72
  "ja": "Ja Morant",
73
  "trae": "Trae Young",
@@ -85,7 +64,6 @@ NICKNAMES = {
85
  "harden": "James Harden",
86
  "westbrook": "Russell Westbrook",
87
  "sabonis": "Domantas Sabonis",
88
- "fox": "De'Aaron Fox",
89
  "mobley": "Evan Mobley",
90
  "cade": "Cade Cunningham",
91
  "paolo": "Paolo Banchero",
@@ -93,57 +71,55 @@ NICKNAMES = {
93
  "scottie": "Scottie Barnes",
94
  "brunson": "Jalen Brunson",
95
  "flagg": "Cooper Flagg",
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
 
 
 
 
98
 
99
  def find_player_mentions(text):
100
- """
101
- Find all NBA player mentions in text.
102
-
103
- Returns:
104
- dict: {canonical_name: [list of matched strings]}
105
- """
106
  if not text:
107
  return {}
108
 
109
- text_lower = text.lower()
110
  found = {}
 
 
 
111
 
112
- # Check nicknames first
113
  for nick, player in NICKNAMES.items():
114
- pattern = r'\b' + re.escape(nick) + r'\b'
115
- if re.search(pattern, text_lower):
116
- if player not in found:
117
- found[player] = []
118
- found[player].append(nick)
119
-
120
- # Check full names
121
- for player in NBA_PLAYERS:
122
- pattern = r'\b' + re.escape(player.lower()) + r'\b'
123
- if re.search(pattern, text_lower):
124
- if player not in found:
125
- found[player] = []
126
- found[player].append(player)
127
 
128
- # Check unique last names (not ambiguous)
129
- words = set(re.findall(r'\b[a-zA-Z]+\b', text_lower))
 
 
 
 
 
 
 
 
 
130
 
131
- for word in words:
132
- if word in AMBIGUOUS_NAMES:
133
- continue
134
-
135
- # Check if it's a unique last name
136
- if word in LAST_NAMES:
137
- matches = LAST_NAMES[word]
138
- if len(matches) == 1:
139
- player = matches[0]
140
- if player not in found:
141
- found[player] = []
142
- found[player].append(word)
143
 
144
- return found
145
-
146
-
147
- def get_mentioned_players(text):
148
- """Get just the list of mentioned players."""
149
- return list(find_player_mentions(text).keys())
 
1
+ """Fast Player Name Matching - Optimized for speed"""
 
 
 
2
 
3
  import re
4
  from nba_players import NBA_PLAYERS
5
 
6
+ # Build lookup structures once at import time
7
+ FULL_NAMES = set(p.lower() for p in NBA_PLAYERS)
8
  FIRST_NAMES = {}
9
  LAST_NAMES = {}
10
 
11
  for player in NBA_PLAYERS:
12
  parts = player.split()
13
  if len(parts) >= 2:
14
+ first, last = parts[0].lower(), parts[-1].lower()
15
+ if last not in LAST_NAMES:
16
+ LAST_NAMES[last] = []
17
+ LAST_NAMES[last].append(player)
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # Unique last names (only one player has this last name)
20
+ UNIQUE_LAST_NAMES = {k: v[0] for k, v in LAST_NAMES.items() if len(v) == 1}
21
+
22
+ # Common words that should NOT match even if they're unique last names
23
+ EXCLUDED_WORDS = {
24
+ "post", "love", "smart", "holiday", "green", "brown", "white", "black",
25
+ "gray", "king", "young", "long", "little", "best", "hart", "wade",
26
+ "rose", "bridges", "wall", "hill", "rice", "prince", "mann", "day",
27
+ "wells", "reed", "waters", "brooks", "banks", "bell", "ford", "lane",
28
+ "grant", "stone", "wood", "wolf", "fox", "dean", "ross", "terry",
29
+ "craig", "cole", "huff", "cook", "monk", "dunn", "bates", "temple",
30
+ "joe", "ben", "sam", "tim", "dan", "tom", "john", "will", "max",
31
+ "cam", "tre", "pat", "drew", "alex", "nick", "gary", "josh", "mike",
32
+ "chris", "kyle", "ryan", "tyler", "jordan", "isaiah", "marcus", "jalen",
33
+ "boston", "houston", "phoenix", "orlando", "washington", "cleveland",
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
 
36
+ # Remove excluded words from unique last names
37
+ for word in EXCLUDED_WORDS:
38
+ UNIQUE_LAST_NAMES.pop(word, None)
39
+
40
+ # Nicknames mapping
41
  NICKNAMES = {
42
  "lebron": "LeBron James",
43
  "steph": "Stephen Curry",
 
47
  "embiid": "Joel Embiid",
48
  "kd": "Kevin Durant",
49
  "ad": "Anthony Davis",
 
50
  "cp3": "Chris Paul",
51
  "ja": "Ja Morant",
52
  "trae": "Trae Young",
 
64
  "harden": "James Harden",
65
  "westbrook": "Russell Westbrook",
66
  "sabonis": "Domantas Sabonis",
 
67
  "mobley": "Evan Mobley",
68
  "cade": "Cade Cunningham",
69
  "paolo": "Paolo Banchero",
 
71
  "scottie": "Scottie Barnes",
72
  "brunson": "Jalen Brunson",
73
  "flagg": "Cooper Flagg",
74
+ "maxey": "Tyrese Maxey",
75
+ "halliburton": "Tyrese Haliburton",
76
+ "haliburton": "Tyrese Haliburton",
77
+ "garland": "Darius Garland",
78
+ "murray": "Jamal Murray",
79
+ "dejounte": "Dejounte Murray",
80
+ "bam": "Bam Adebayo",
81
+ "herro": "Tyler Herro",
82
+ "jimmy": "Jimmy Butler",
83
+ "kawhi": "Kawhi Leonard",
84
+ "pg13": "Paul George",
85
  }
86
 
87
+ # Precompile word pattern
88
+ WORD_PATTERN = re.compile(r"[a-zA-Z']+")
89
+
90
 
91
  def find_player_mentions(text):
92
+ """Fast player mention detection."""
 
 
 
 
 
93
  if not text:
94
  return {}
95
 
 
96
  found = {}
97
+ text_lower = text.lower()
98
+ words = WORD_PATTERN.findall(text_lower)
99
+ word_set = set(words)
100
 
101
+ # Check nicknames (fast set lookup)
102
  for nick, player in NICKNAMES.items():
103
+ if nick in word_set:
104
+ found[player] = [nick]
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ # Check full names by looking at word pairs
107
+ for i in range(len(words) - 1):
108
+ full_name = words[i] + " " + words[i + 1]
109
+ if full_name in FULL_NAMES:
110
+ # Find proper case version
111
+ for player in NBA_PLAYERS:
112
+ if player.lower() == full_name:
113
+ if player not in found:
114
+ found[player] = []
115
+ found[player].append(full_name)
116
+ break
117
 
118
+ # Check unique last names (but not excluded common words)
119
+ for word in word_set:
120
+ if word in UNIQUE_LAST_NAMES:
121
+ player = UNIQUE_LAST_NAMES[word]
122
+ if player not in found:
123
+ found[player] = [word]
 
 
 
 
 
 
124
 
125
+ return found