Spaces:
Sleeping
Sleeping
fix: Clean company name suffixes in stock listings
Browse files- Strip "- Common Stock", "- Class A/B/C", etc. when parsing NASDAQ files
- Refreshed cache with 5274 stocks with clean names
- Prevents polluted search queries downstream
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- data/cache/us_stocks.json +0 -0
- src/stock_listings.py +22 -1
data/cache/us_stocks.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/stock_listings.py
CHANGED
|
@@ -48,6 +48,27 @@ EXCLUDED_PATTERNS = [
|
|
| 48 |
r'DEPOSITARY', r'ADR$', r'ADS$',
|
| 49 |
]
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def _is_common_stock(name: str, symbol: str) -> bool:
|
| 53 |
"""Filter to include only common stocks, exclude ETFs/funds/etc."""
|
|
@@ -128,7 +149,7 @@ def _parse_nasdaq_file(content: str, exchange: str) -> List[dict]:
|
|
| 128 |
|
| 129 |
stocks.append({
|
| 130 |
"symbol": symbol,
|
| 131 |
-
"name": name,
|
| 132 |
"exchange": exchange
|
| 133 |
})
|
| 134 |
|
|
|
|
| 48 |
r'DEPOSITARY', r'ADR$', r'ADS$',
|
| 49 |
]
|
| 50 |
|
| 51 |
+
# Suffixes to strip from company names for cleaner display
|
| 52 |
+
NAME_SUFFIXES_TO_STRIP = [
|
| 53 |
+
" - Common Stock",
|
| 54 |
+
" - Class A Common Stock",
|
| 55 |
+
" - Class B Common Stock",
|
| 56 |
+
" - Class C Common Stock",
|
| 57 |
+
" - Ordinary Shares",
|
| 58 |
+
" - Class A Ordinary Shares",
|
| 59 |
+
" - Class B Ordinary Shares",
|
| 60 |
+
" Common Stock",
|
| 61 |
+
" Ordinary Shares",
|
| 62 |
+
]
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _clean_company_name(name: str) -> str:
|
| 66 |
+
"""Strip common suffixes from company names for cleaner display."""
|
| 67 |
+
for suffix in NAME_SUFFIXES_TO_STRIP:
|
| 68 |
+
if name.endswith(suffix):
|
| 69 |
+
return name[:-len(suffix)]
|
| 70 |
+
return name
|
| 71 |
+
|
| 72 |
|
| 73 |
def _is_common_stock(name: str, symbol: str) -> bool:
|
| 74 |
"""Filter to include only common stocks, exclude ETFs/funds/etc."""
|
|
|
|
| 149 |
|
| 150 |
stocks.append({
|
| 151 |
"symbol": symbol,
|
| 152 |
+
"name": _clean_company_name(name),
|
| 153 |
"exchange": exchange
|
| 154 |
})
|
| 155 |
|