Spaces:
Running
Running
AJAY KASU commited on
Commit ·
80482cc
1
Parent(s): 7b30313
Fix: Realistic Benchmark construction and Passive Mode AI gate
Browse files- ai/prompts.py +11 -0
- main.py +59 -54
ai/prompts.py
CHANGED
|
@@ -51,3 +51,14 @@ Write a "Trailing 30-Day Risk & Performance Attribution" report relative to the
|
|
| 51 |
|
| 52 |
Write a professional, concise 3-paragraph commentary.
|
| 53 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
Write a professional, concise 3-paragraph commentary.
|
| 53 |
"""
|
| 54 |
+
|
| 55 |
+
# Passive Mode Template (The "Session Integrity Check")
|
| 56 |
+
PASSIVE_NARRATIVE_TEMPLATE = """
|
| 57 |
+
The portfolio is in a Full Replication state (Tracking Error ≈ 0.00%).
|
| 58 |
+
|
| 59 |
+
Confirm that active return and tracking error are strictly negligible.
|
| 60 |
+
|
| 61 |
+
Do NOT use the words 'overweight', 'underweight', 'contributor', or 'detractor'.
|
| 62 |
+
|
| 63 |
+
State that the portfolio performance is driven entirely by Market Beta and matches the benchmark return exactly.
|
| 64 |
+
"""
|
main.py
CHANGED
|
@@ -31,29 +31,34 @@ class QuantScaleSystem:
|
|
| 31 |
tickers = self.data_engine.fetch_sp500_tickers()
|
| 32 |
|
| 33 |
# OPTIMIZATION: Filter Universe BEFORE Fetching Data
|
| 34 |
-
#
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
if request.strategy and request.top_n:
|
| 38 |
logger.info(f"Applying Strategy PRE-FETCH: {request.strategy} with Top N={request.top_n}")
|
| 39 |
-
caps = self.data_engine.fetch_market_caps(tickers)
|
| 40 |
-
|
| 41 |
-
# Sort valid_tickers by cap
|
| 42 |
-
valid_caps = {t: c for t, c in caps.items() if c > 0}
|
| 43 |
-
sorted_tickers = sorted(valid_caps.keys(), key=lambda t: valid_caps[t])
|
| 44 |
|
| 45 |
if request.strategy == "smallest_market_cap":
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
| 48 |
|
| 49 |
elif request.strategy == "largest_market_cap":
|
| 50 |
-
|
| 51 |
-
|
| 52 |
else:
|
| 53 |
-
# Default safety limit for Demo
|
| 54 |
valid_tickers_for_fetch = tickers[:60]
|
| 55 |
-
|
| 56 |
-
|
| 57 |
# 2. Get Market Data (Only for filtered subset)
|
| 58 |
# Fetch last 2 years for covariance
|
| 59 |
data = self.data_engine.fetch_market_data(valid_tickers_for_fetch, start_date="2023-01-01")
|
|
@@ -75,62 +80,62 @@ class QuantScaleSystem:
|
|
| 75 |
|
| 76 |
cov_matrix = self.risk_model.compute_covariance_matrix(returns)
|
| 77 |
|
| 78 |
-
# 4. Get Benchmark Data (S&P 500)
|
| 79 |
-
#
|
| 80 |
-
# Simplification: Assume Market Cap weights or Equal weights for the benchmark
|
| 81 |
-
# since getting live weights is hard without expensive data.
|
| 82 |
-
# We will assume Equal Weights for the Benchmark in this demo logic
|
| 83 |
-
# or use a proxy.
|
| 84 |
-
# BETTER: Use SPY returns as the benchmark returns series for optimization.
|
| 85 |
-
|
| 86 |
-
# For the optimizer, we need "Benchmark Weights" if we want to minimize active weight variance.
|
| 87 |
-
# If we just map to S&P 500, let's assume valid_tickers ARE the index.
|
| 88 |
-
# 4. Get Benchmark Data (S&P 500)
|
| 89 |
-
# Fetch benchmark to calculate weights used for Tracking Error
|
| 90 |
-
# REALISTIC PROXY: S&P 500 is Market Cap Weighted.
|
| 91 |
-
# We manually assign Top 10 weights to make Tracking Error realistic when checking exclusions.
|
| 92 |
|
| 93 |
n_assets = len(valid_tickers)
|
| 94 |
benchmark_weights = pd.Series(0.0, index=valid_tickers)
|
| 95 |
|
| 96 |
-
#
|
| 97 |
-
#
|
| 98 |
-
|
| 99 |
-
"MSFT": 0.070, "AAPL": 0.065, "NVDA": 0.060,
|
| 100 |
-
"AMZN": 0.035, "GOOGL": 0.020, "GOOG": 0.020,
|
| 101 |
-
"META": 0.020, "TSLA": 0.015, "BRK-B": 0.015,
|
| 102 |
-
"LLY": 0.012, "AVGO": 0.012, "JPM": 0.010
|
| 103 |
-
}
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
benchmark_weights[t] = w
|
| 109 |
-
current_total += w
|
| 110 |
-
|
| 111 |
-
# Distribute remaining weight equally among rest
|
| 112 |
-
remaining_weight = 1.0 - current_total
|
| 113 |
-
remaining_count = n_assets - len([t for t in top_weights if t in valid_tickers])
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
#
|
| 122 |
-
|
|
|
|
|
|
|
| 123 |
|
|
|
|
|
|
|
|
|
|
| 124 |
# 5. Optimize Portfolio
|
| 125 |
sector_map = self.data_engine.get_sector_map()
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
opt_result = self.optimizer.optimize_portfolio(
|
| 128 |
covariance_matrix=cov_matrix,
|
| 129 |
tickers=valid_tickers,
|
| 130 |
benchmark_weights=benchmark_weights,
|
| 131 |
sector_map=sector_map,
|
| 132 |
excluded_sectors=request.excluded_sectors,
|
| 133 |
-
excluded_tickers=
|
| 134 |
max_weight=request.max_weight
|
| 135 |
)
|
| 136 |
|
|
|
|
| 31 |
tickers = self.data_engine.fetch_sp500_tickers()
|
| 32 |
|
| 33 |
# OPTIMIZATION: Filter Universe BEFORE Fetching Data
|
| 34 |
+
# But we MUST fetch "Market Drivers" to define a realistic Benchmark
|
| 35 |
+
# Otherwise TE is 0.0 because Benchmark == Portfolio Universe
|
| 36 |
+
|
| 37 |
+
caps = self.data_engine.fetch_market_caps(tickers)
|
| 38 |
+
valid_caps = {t: c for t, c in caps.items() if c > 0}
|
| 39 |
+
sorted_by_cap = sorted(valid_caps.keys(), key=lambda t: valid_caps[t])
|
| 40 |
+
|
| 41 |
+
# Define "Market Drivers" (Top 20) - Essential for S&P 500 Proxy
|
| 42 |
+
market_drivers = sorted_by_cap[-20:]
|
| 43 |
+
|
| 44 |
+
valid_tickers_for_fetch = []
|
| 45 |
|
| 46 |
if request.strategy and request.top_n:
|
| 47 |
logger.info(f"Applying Strategy PRE-FETCH: {request.strategy} with Top N={request.top_n}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
if request.strategy == "smallest_market_cap":
|
| 50 |
+
targets = sorted_by_cap[:request.top_n]
|
| 51 |
+
# We fetch Targets + Drivers
|
| 52 |
+
valid_tickers_for_fetch = list(set(targets + market_drivers))
|
| 53 |
+
logger.info(f"Fetching {len(valid_tickers_for_fetch)} tickers (Targets + Drivers)")
|
| 54 |
|
| 55 |
elif request.strategy == "largest_market_cap":
|
| 56 |
+
targets = sorted_by_cap[-request.top_n:]
|
| 57 |
+
valid_tickers_for_fetch = list(set(targets + market_drivers))
|
| 58 |
else:
|
| 59 |
+
# Default safety limit for Demo
|
| 60 |
valid_tickers_for_fetch = tickers[:60]
|
| 61 |
+
|
|
|
|
| 62 |
# 2. Get Market Data (Only for filtered subset)
|
| 63 |
# Fetch last 2 years for covariance
|
| 64 |
data = self.data_engine.fetch_market_data(valid_tickers_for_fetch, start_date="2023-01-01")
|
|
|
|
| 80 |
|
| 81 |
cov_matrix = self.risk_model.compute_covariance_matrix(returns)
|
| 82 |
|
| 83 |
+
# 4. Get Benchmark Data (Realistic S&P 500 Proxy)
|
| 84 |
+
# We assume the Driver stocks carry their heavy weight, and the rest is distributed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
n_assets = len(valid_tickers)
|
| 87 |
benchmark_weights = pd.Series(0.0, index=valid_tickers)
|
| 88 |
|
| 89 |
+
# Assign distinct weights to known Drivers if they are in our data
|
| 90 |
+
# Approximate Mag 7 weights (or use market cap ratio if we had total cap)
|
| 91 |
+
# Using a proxy distribution logic:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
# Calculate Total Cap of our universe subset to see relative sizing
|
| 94 |
+
subset_caps = {t: valid_caps.get(t, 1e9) for t in valid_tickers}
|
| 95 |
+
total_subset_cap = sum(subset_caps.values())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
+
# If we are missing 400 stocks, we can't normalize to 1.0 perfectly *relative to SPX*
|
| 98 |
+
# But for the Optimizer's math (Port vs Bench), both must sum to 1.0 within the optimization universe?
|
| 99 |
+
# NO. If we want TE against full SPX, we need to handle the "missing" variance.
|
| 100 |
+
# But simpler: Normalize weights within the Available Universe based on Cap.
|
| 101 |
+
|
| 102 |
+
# For "Smallest 50" strategy:
|
| 103 |
+
# The Drivers (AAPL, etc.) are in `valid_tickers` now.
|
| 104 |
+
# So Benchmark will be Cap Weighted (90% Drivers, 10% Small Caps).
|
| 105 |
+
# Portfolio will be Constrained to 0% Drivers.
|
| 106 |
+
# Result -> Huge TE. Correct.
|
| 107 |
|
| 108 |
+
for t in valid_tickers:
|
| 109 |
+
benchmark_weights[t] = subset_caps[t] / total_subset_cap
|
| 110 |
+
|
| 111 |
# 5. Optimize Portfolio
|
| 112 |
sector_map = self.data_engine.get_sector_map()
|
| 113 |
|
| 114 |
+
# If Strategy requires excluding the "Drivers" (because they aren't in the Target set)
|
| 115 |
+
# We must add them to 'excluded_tickers' for the Optimizer
|
| 116 |
+
|
| 117 |
+
final_exclusions = list(request.excluded_tickers)
|
| 118 |
+
|
| 119 |
+
if request.strategy == "smallest_market_cap":
|
| 120 |
+
# Exclude anything that IS NOT in the target list (i.e. exclude the Drivers)
|
| 121 |
+
# targets from above
|
| 122 |
+
targets = sorted_by_cap[:request.top_n]
|
| 123 |
+
for t in valid_tickers:
|
| 124 |
+
if t not in targets:
|
| 125 |
+
final_exclusions.append(t)
|
| 126 |
+
|
| 127 |
+
# ... logic for largest ...
|
| 128 |
+
if request.strategy == "largest_market_cap":
|
| 129 |
+
# Drivers are likely IN the target set, so no extra exclusion needed usually
|
| 130 |
+
pass
|
| 131 |
+
|
| 132 |
opt_result = self.optimizer.optimize_portfolio(
|
| 133 |
covariance_matrix=cov_matrix,
|
| 134 |
tickers=valid_tickers,
|
| 135 |
benchmark_weights=benchmark_weights,
|
| 136 |
sector_map=sector_map,
|
| 137 |
excluded_sectors=request.excluded_sectors,
|
| 138 |
+
excluded_tickers=final_exclusions,
|
| 139 |
max_weight=request.max_weight
|
| 140 |
)
|
| 141 |
|