AJAY KASU commited on
Commit
b14afd7
·
1 Parent(s): be4d472

Fix: Normalize sector names to resolve Healthcare exclusion leakage

Browse files
Files changed (1) hide show
  1. data/optimizer.py +8 -1
data/optimizer.py CHANGED
@@ -74,10 +74,17 @@ class PortfolioOptimizer:
74
  logger.info(f"Applying Sector Exclusion Validation for: {excluded_sectors}")
75
  for i, ticker in enumerate(tickers):
76
  sector = sector_map.get(ticker, "Unknown")
 
 
 
77
  for excl in excluded_sectors:
78
- if excl.lower() == sector.lower() or (excl == "Technology" and sector == "Information Technology"):
 
 
 
79
  excluded_indices.append(i)
80
  mask_vector[i] = 1
 
81
 
82
  # Ticker Exclusions (NEW)
83
  if excluded_tickers:
 
74
  logger.info(f"Applying Sector Exclusion Validation for: {excluded_sectors}")
75
  for i, ticker in enumerate(tickers):
76
  sector = sector_map.get(ticker, "Unknown")
77
+ # Normalize both for robust matching (e.g., "Health Care" vs "Healthcare")
78
+ sector_norm = sector.lower().replace(" ", "").replace("-", "")
79
+
80
  for excl in excluded_sectors:
81
+ excl_norm = excl.lower().replace(" ", "").replace("-", "")
82
+
83
+ # Match if normalized strings are equal OR special mapping for Tech
84
+ if excl_norm == sector_norm or (excl_norm == "tech" and sector_norm == "informationtechnology"):
85
  excluded_indices.append(i)
86
  mask_vector[i] = 1
87
+ break
88
 
89
  # Ticker Exclusions (NEW)
90
  if excluded_tickers: