AJAY KASU commited on
Commit
9060f20
·
1 Parent(s): 4aac777

Perf: Filter universe BEFORE fetching data to prevent timeout

Browse files
Files changed (1) hide show
  1. main.py +30 -24
main.py CHANGED
@@ -29,12 +29,34 @@ class QuantScaleSystem:
29
 
30
  # 1. Fetch Universe (S&P 500)
31
  tickers = self.data_engine.fetch_sp500_tickers()
32
- # Limit for demo speed if needed, but let's try full
33
- # tickers = tickers[:50]
 
 
34
 
35
- # 2. Get Market Data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  # Fetch last 2 years for covariance
37
- data = self.data_engine.fetch_market_data(tickers, start_date="2023-01-01")
38
  if data.empty:
39
  logger.error("No market data available. Aborting.")
40
  return None
@@ -45,27 +67,11 @@ class QuantScaleSystem:
45
  # Ensure we align returns and tickers
46
  valid_tickers = returns.columns.tolist()
47
 
48
- # APPLY FILTERING STRATEGY (New)
49
  if request.strategy and request.top_n:
50
- logger.info(f"Applying Strategy: {request.strategy} with Top N={request.top_n}")
51
- caps = self.data_engine.fetch_market_caps(valid_tickers)
52
-
53
- # Sort valid_tickers by cap
54
- # Filter out 0 caps (failed fetches)
55
- valid_caps = {t: c for t, c in caps.items() if c > 0}
56
- sorted_tickers = sorted(valid_caps.keys(), key=lambda t: valid_caps[t])
57
-
58
- if request.strategy == "smallest_market_cap":
59
- valid_tickers = sorted_tickers[:request.top_n]
60
- logger.info(f"Filtered to Smallest {request.top_n}: {valid_tickers[:5]}...")
61
-
62
- elif request.strategy == "largest_market_cap":
63
- valid_tickers = sorted_tickers[-request.top_n:]
64
- logger.info(f"Filtered to Largest {request.top_n}: {valid_tickers[:5]}...")
65
-
66
- # Re-fetch returns for just these? No, we already have `returns` DF.
67
- # Just slice the DF to save computation in Risk Model
68
- returns = returns[valid_tickers]
69
 
70
  cov_matrix = self.risk_model.compute_covariance_matrix(returns)
71
 
 
29
 
30
  # 1. Fetch Universe (S&P 500)
31
  tickers = self.data_engine.fetch_sp500_tickers()
32
+
33
+ # OPTIMIZATION: Filter Universe BEFORE Fetching Data
34
+ # fetching 500 tickers takes too long on free tier spaces -> Timeout
35
+ valid_tickers_for_fetch = tickers
36
 
37
+ if request.strategy and request.top_n:
38
+ logger.info(f"Applying Strategy PRE-FETCH: {request.strategy} with Top N={request.top_n}")
39
+ caps = self.data_engine.fetch_market_caps(tickers)
40
+
41
+ # Sort valid_tickers by cap
42
+ valid_caps = {t: c for t, c in caps.items() if c > 0}
43
+ sorted_tickers = sorted(valid_caps.keys(), key=lambda t: valid_caps[t])
44
+
45
+ if request.strategy == "smallest_market_cap":
46
+ valid_tickers_for_fetch = sorted_tickers[:request.top_n]
47
+ logger.info(f"Filtered to Smallest {request.top_n} for Fetching: {valid_tickers_for_fetch[:5]}...")
48
+
49
+ elif request.strategy == "largest_market_cap":
50
+ valid_tickers_for_fetch = sorted_tickers[-request.top_n:]
51
+ logger.info(f"Filtered to Largest {request.top_n} for Fetching: {valid_tickers_for_fetch[:5]}...")
52
+ else:
53
+ # Default safety limit for Demo if no strategy
54
+ valid_tickers_for_fetch = tickers[:60]
55
+ logger.warning("No strategy specified. Defaulting to first 60 tickers for Demo Speed.")
56
+
57
+ # 2. Get Market Data (Only for filtered subset)
58
  # Fetch last 2 years for covariance
59
+ data = self.data_engine.fetch_market_data(valid_tickers_for_fetch, start_date="2023-01-01")
60
  if data.empty:
61
  logger.error("No market data available. Aborting.")
62
  return None
 
67
  # Ensure we align returns and tickers
68
  valid_tickers = returns.columns.tolist()
69
 
70
+ # Re-verify filter (data fetch might have dropped some)
71
  if request.strategy and request.top_n:
72
+ # Re-sort based on what we actually have?
73
+ # Or just proceed, since we pre-filtered.
74
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  cov_matrix = self.risk_model.compute_covariance_matrix(returns)
77