Tulitula commited on
Commit
5083e17
·
verified ·
1 Parent(s): 550565d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +700 -127
app.py CHANGED
@@ -1,134 +1,707 @@
1
- # app.py - Part 1
 
 
 
2
 
3
- import pandas as pd
4
  import numpy as np
5
- import yfinance as yf
 
6
  import gradio as gr
7
- from itertools import combinations_with_replacement
8
-
9
- # -------------------
10
- # Helper functions
11
- # -------------------
12
-
13
- def fetch_live_data(tickers, period="1y"):
14
- """Fetch historical adjusted close prices for given tickers."""
15
- data = yf.download(tickers, period=period)["Adj Close"]
16
- return data.dropna()
17
-
18
- def calculate_portfolio_metrics(weights, mean_returns, cov_matrix, risk_free_rate=0.045):
19
- """Return expected portfolio return, volatility, and beta."""
20
- weights = np.array(weights)
21
- portfolio_return = np.sum(mean_returns * weights)
22
- portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
23
- beta = np.sum(weights) # Placeholder if no real beta calc
24
- return portfolio_return, portfolio_volatility, beta
25
-
26
- def generate_synthetic_portfolios(tickers, num_portfolios=1000):
27
- """Generate synthetic portfolios from live data for given tickers."""
28
- df_prices = fetch_live_data(tickers)
29
- returns = df_prices.pct_change().dropna()
30
- mean_returns = returns.mean()
31
- cov_matrix = returns.cov()
32
-
33
- synthetic_data = []
34
- for _ in range(num_portfolios):
35
- weights = np.random.random(len(tickers))
36
- weights /= np.sum(weights)
37
- er, sigma, beta = calculate_portfolio_metrics(weights, mean_returns, cov_matrix)
38
- synthetic_data.append({
39
- "weights": weights,
40
- "er_p": er,
41
- "sigma_p": sigma,
42
- "beta_p": beta
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- return pd.DataFrame(synthetic_data)
46
-
47
- def select_risk_profiles(synth_df):
48
- """Select high/high, medium/medium, low/low risk profiles from synthetic dataset."""
49
- high = synth_df.sort_values("er_p", ascending=False).iloc[0]
50
- low = synth_df.sort_values("sigma_p", ascending=True).iloc[0]
51
-
52
- median_idx = ((synth_df["sigma_p"] - synth_df["sigma_p"].median()).abs() +
53
- (synth_df["er_p"] - synth_df["er_p"].median()).abs()).idxmin()
54
- medium = synth_df.loc[median_idx]
55
-
56
- return high, medium, low
57
-
58
- def find_efficient_same_sigma(user_er, user_sigma, synth_df):
59
- """Find portfolio with same sigma but highest return."""
60
- close_sigma = synth_df[np.isclose(synth_df["sigma_p"], user_sigma, atol=0.002)]
61
- if close_sigma.empty:
62
- return synth_df.iloc[0]
63
- return close_sigma.sort_values("er_p", ascending=False).iloc[0]
64
-
65
- def find_efficient_same_return(user_er, user_sigma, synth_df):
66
- """Find portfolio with same return but lowest sigma."""
67
- close_return = synth_df[np.isclose(synth_df["er_p"], user_er, atol=0.002)]
68
- if close_return.empty:
69
- return synth_df.iloc[0]
70
- return close_return.sort_values("sigma_p", ascending=True).iloc[0]
71
- # -------------------
72
- # Main compute function
73
- # -------------------
74
-
75
- def compute(user_tickers):
76
- # Convert comma-separated string into ticker list
77
- tickers = [t.strip().upper() for t in user_tickers.split(",") if t.strip()]
78
- if len(tickers) < 2:
79
- return "Please enter at least two tickers.", None
80
-
81
- # Fetch live data & compute user portfolio metrics (equal weights for now)
82
- df_prices = fetch_live_data(tickers)
83
- if df_prices.empty:
84
- return "Could not fetch data. Check tickers.", None
85
-
86
- returns = df_prices.pct_change().dropna()
87
- mean_returns = returns.mean()
88
- cov_matrix = returns.cov()
89
- user_weights = np.ones(len(tickers)) / len(tickers)
90
- user_er, user_sigma, user_beta = calculate_portfolio_metrics(user_weights, mean_returns, cov_matrix)
91
-
92
- # Generate synthetic dataset
93
- synth_df = generate_synthetic_portfolios(tickers, num_portfolios=1000)
94
-
95
- # Select profiles
96
- eff_sigma = find_efficient_same_sigma(user_er, user_sigma, synth_df)
97
- eff_return = find_efficient_same_return(user_er, user_sigma, synth_df)
98
- high, medium, low = select_risk_profiles(synth_df)
99
-
100
- # Prepare results DataFrame
101
- portfolios = {
102
- "User Portfolio": [user_er, user_sigma, user_beta, user_weights],
103
- "Efficient (Same Sigma)": [eff_sigma.er_p, eff_sigma.sigma_p, eff_sigma.beta_p, eff_sigma.weights],
104
- "Efficient (Same Return)": [eff_return.er_p, eff_return.sigma_p, eff_return.beta_p, eff_return.weights],
105
- "High Risk / High Return": [high.er_p, high.sigma_p, high.beta_p, high.weights],
106
- "Medium Risk / Medium Return": [medium.er_p, medium.sigma_p, medium.beta_p, medium.weights],
107
- "Low Risk / Low Return": [low.er_p, low.sigma_p, low.beta_p, low.weights],
108
- }
109
-
110
- df_out = pd.DataFrame(portfolios, index=["Expected Return", "Sigma", "Beta", "Weights"])
111
-
112
- return df_out.to_markdown(), df_out
113
-
114
- # -------------------
115
- # Gradio Interface
116
- # -------------------
117
-
118
- with gr.Blocks() as demo:
119
- gr.Markdown("## Portfolio Optimizer and Risk Profiles")
120
- tickers_input = gr.Textbox(label="Enter tickers (comma separated)", placeholder="AAPL, MSFT, GOOG")
121
- output_md = gr.Markdown()
122
- output_df = gr.Dataframe(headers=["Portfolio", "Value"], interactive=False)
123
-
124
- def run_and_display(tickers):
125
- md, df = compute(tickers)
126
- if df is None:
127
- return md, None
128
- return md, df
129
-
130
- run_btn = gr.Button("Run Analysis")
131
- run_btn.click(fn=run_and_display, inputs=tickers_input, outputs=[output_md, output_df])
132
 
133
  if __name__ == "__main__":
134
- demo.launch()
 
 
1
+ import os, io, math, warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ from typing import List, Tuple, Dict, Optional
5
 
 
6
  import numpy as np
7
+ import pandas as pd
8
+ import matplotlib.pyplot as plt
9
  import gradio as gr
10
+ from PIL import Image
11
+ import requests
12
+ import yfinance as yf
13
+
14
+ from sklearn.neighbors import KNeighborsRegressor
15
+ from sklearn.preprocessing import StandardScaler
16
+
17
+ # ---------------- config ----------------
18
+ DATA_DIR = "data"
19
+ DATASET_PATH = os.path.join(DATA_DIR, "investor_profiles.csv")
20
+
21
+ MAX_TICKERS = 30
22
+ DEFAULT_LOOKBACK_YEARS = 5
23
+ MARKET_TICKER = "VOO"
24
+
25
+ POS_COLS = ["ticker", "amount_usd", "weight_exposure", "beta"]
26
+ SUG_COLS = ["ticker", "suggested_weight_exposure"]
27
+
28
+ FRED_MAP = [
29
+ (1, "DGS1"),
30
+ (2, "DGS2"),
31
+ (3, "DGS3"),
32
+ (5, "DGS5"),
33
+ (7, "DGS7"),
34
+ (10, "DGS10"),
35
+ (20, "DGS20"),
36
+ (30, "DGS30"),
37
+ (100, "DGS30"),
38
+ ]
39
+
40
+ # ---------------- helpers ----------------
41
+ def ensure_data_dir():
42
+ os.makedirs(DATA_DIR, exist_ok=True)
43
+
44
+ def empty_positions_df():
45
+ return pd.DataFrame(columns=POS_COLS)
46
+
47
+ def empty_suggest_df():
48
+ return pd.DataFrame(columns=SUG_COLS)
49
+
50
+ def fred_series_for_horizon(years: float) -> str:
51
+ y = max(1.0, min(100.0, float(years)))
52
+ for cutoff, code in FRED_MAP:
53
+ if y <= cutoff:
54
+ return code
55
+ return "DGS30"
56
+
57
+ def fetch_fred_yield_annual(code: str) -> float:
58
+ # FRED CSV endpoint
59
+ url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?id={code}"
60
+ try:
61
+ r = requests.get(url, timeout=10)
62
+ r.raise_for_status()
63
+ df = pd.read_csv(io.StringIO(r.text))
64
+ s = pd.to_numeric(df.iloc[:, 1], errors="coerce").dropna()
65
+ return float(s.iloc[-1] / 100.0) if len(s) else 0.03
66
+ except Exception:
67
+ return 0.03
68
+
69
+ def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
70
+ """
71
+ Fetch monthly adjusted Close for given tickers. Handles both single-ticker (Series)
72
+ and multi-ticker (DataFrame) returns from yfinance and ensures columns are ticker symbols.
73
+ """
74
+ if not tickers:
75
+ return pd.DataFrame()
76
+
77
+ start = pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)
78
+ end = pd.Timestamp.today(tz="UTC")
79
+
80
+ df_close = yf.download(
81
+ list(dict.fromkeys(tickers)),
82
+ start=start.date(),
83
+ end=end.date(),
84
+ interval="1mo",
85
+ auto_adjust=True,
86
+ progress=False
87
+ )["Close"]
88
+
89
+ # If a single ticker is requested, yfinance gives a Series named "Close".
90
+ # Make it a DataFrame and name the column with the ticker symbol.
91
+ if isinstance(df_close, pd.Series):
92
+ df_close = df_close.to_frame()
93
+ # name column if we know the ticker
94
+ if len(tickers) == 1:
95
+ df_close.columns = [tickers[0].upper()]
96
+
97
+ # Standardize column names to uppercase tickers when possible.
98
+ if isinstance(df_close.columns, pd.Index):
99
+ df_close.columns = [str(c).upper() for c in df_close.columns]
100
+
101
+ df_close = df_close.dropna(how="all").fillna(method="ffill")
102
+ return df_close
103
+
104
+ def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
105
+ return prices.pct_change().dropna()
106
+
107
+ def annualize_mean(m):
108
+ return np.asarray(m, dtype=float) * 12.0
109
+
110
+ def annualize_sigma(s):
111
+ return np.asarray(s, dtype=float) * math.sqrt(12.0)
112
+
113
+ def yahoo_search(query: str):
114
+ # Yahoo symbol search
115
+ if not query or len(query.strip()) == 0:
116
+ return []
117
+ url = "https://query1.finance.yahoo.com/v1/finance/search"
118
+ params = {"q": query.strip(), "quotesCount": 10, "newsCount": 0}
119
+ headers = {"User-Agent": "Mozilla/5.0"}
120
+ try:
121
+ r = requests.get(url, params=params, headers=headers, timeout=10)
122
+ r.raise_for_status()
123
+ data = r.json()
124
+ out = []
125
+ for q in data.get("quotes", []):
126
+ sym = q.get("symbol")
127
+ name = q.get("shortname") or q.get("longname") or ""
128
+ exch = q.get("exchDisp") or ""
129
+ if sym and sym.isascii():
130
+ out.append({"symbol": sym, "name": name, "exchange": exch})
131
+ if not out:
132
+ out = [{"symbol": query.strip().upper(), "name": "typed symbol", "exchange": "n/a"}]
133
+ return out[:10]
134
+ except Exception:
135
+ return [{"symbol": query.strip().upper(), "name": "typed symbol", "exchange": "n/a"}]
136
+
137
+ def validate_tickers(symbols: List[str], years: int) -> List[str]:
138
+ ok, df = [], fetch_prices_monthly(list(set(symbols)), years)
139
+ for s in symbols:
140
+ if s.upper() in df.columns:
141
+ ok.append(s.upper())
142
+ return ok
143
+
144
+ # -------------- aligned moments --------------
145
+ def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
146
+ uniq = [c.upper() for c in dict.fromkeys(symbols) if c.upper() != MARKET_TICKER]
147
+ tickers = uniq + [MARKET_TICKER]
148
+ px = fetch_prices_monthly(tickers, years)
149
+ rets = monthly_returns(px)
150
+ cols = [c for c in uniq if c in rets.columns] + ([MARKET_TICKER] if MARKET_TICKER in rets.columns else [])
151
+ R = rets[cols].dropna(how="any")
152
+ return R.loc[:, ~R.columns.duplicated()]
153
+
154
+ def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
155
+ R = get_aligned_monthly_returns(symbols, years)
156
+ if MARKET_TICKER not in R.columns or R.shape[0] < 3:
157
+ raise ValueError("Not enough aligned data for market/tickers")
158
+ rf_m = rf_ann / 12.0
159
+
160
+ m = R[MARKET_TICKER]
161
+ if isinstance(m, pd.DataFrame):
162
+ m = m.iloc[:, 0].squeeze()
163
+
164
+ mu_m_ann = float(annualize_mean(m.mean()))
165
+ sigma_m_ann = float(annualize_sigma(m.std(ddof=1)))
166
+ erp_ann = float(mu_m_ann - rf_ann)
167
+
168
+ ex_m = m - rf_m
169
+ var_m = float(np.var(ex_m.values, ddof=1))
170
+ var_m = max(var_m, 1e-6)
171
+
172
+ betas: Dict[str, float] = {}
173
+ for s in [c for c in R.columns if c != MARKET_TICKER]:
174
+ ex_s = R[s] - rf_m
175
+ betas[s] = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1] / var_m)
176
+
177
+ betas[MARKET_TICKER] = 1.0 # by definition
178
+
179
+ asset_cols = [c for c in R.columns if c != MARKET_TICKER]
180
+ cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
181
+ covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
182
+
183
+ return {"betas": betas, "cov_ann": covA, "erp_ann": erp_ann, "sigma_m_ann": sigma_m_ann}
184
+
185
+ def capm_er(beta: float, rf_ann: float, erp_ann: float) -> float:
186
+ return float(rf_ann + beta * erp_ann)
187
+
188
+ def portfolio_stats(weights: Dict[str, float],
189
+ cov_ann: pd.DataFrame,
190
+ betas: Dict[str, float],
191
+ rf_ann: float,
192
+ erp_ann: float) -> Tuple[float, float, float]:
193
+ tickers = list(weights.keys())
194
+ w = np.array([weights[t] for t in tickers], dtype=float)
195
+ gross = float(np.sum(np.abs(w)))
196
+ if gross == 0:
197
+ return 0.0, 0.0, 0.0
198
+ w_expo = w / gross
199
+ beta_p = float(np.dot([betas.get(t, 0.0) for t in tickers], w_expo))
200
+ er_p = capm_er(beta_p, rf_ann, erp_ann)
201
+ cov = cov_ann.reindex(index=tickers, columns=tickers).fillna(0.0).to_numpy()
202
+ sigma_p = math.sqrt(float(max(w_expo.T @ cov @ w_expo, 0.0)))
203
+ return beta_p, er_p, sigma_p
204
+
205
+ # -------------- CML helpers --------------
206
+ def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
207
+ if sigma_mkt <= 1e-12:
208
+ return 0.0, 1.0, rf_ann
209
+ a = sigma_target / sigma_mkt
210
+ return a, 1.0 - a, rf_ann + a * erp_ann
211
+
212
+ def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
213
+ if abs(erp_ann) <= 1e-12:
214
+ return 0.0, 1.0, rf_ann
215
+ a = (mu_target - rf_ann) / erp_ann
216
+ return a, 1.0 - a, abs(a) * sigma_mkt
217
+
218
+ def plot_cml(
219
+ rf_ann, erp_ann, sigma_mkt,
220
+ pt_sigma, pt_mu,
221
+ same_sigma_sigma, same_sigma_mu,
222
+ same_mu_sigma, same_mu_mu,
223
+ targ_sigma=None, targ_mu=None
224
+ ) -> Image.Image:
225
+ fig = plt.figure(figsize=(6, 4), dpi=120)
226
+
227
+ xmax = max(
228
+ 0.3,
229
+ sigma_mkt * 2.0,
230
+ pt_sigma * 1.4,
231
+ same_mu_sigma * 1.4,
232
+ same_sigma_sigma * 1.4,
233
+ (targ_sigma or 0.0) * 1.4,
234
+ )
235
+ xs = np.linspace(0, xmax, 160)
236
+ slope = erp_ann / max(sigma_mkt, 1e-12)
237
+ cml = rf_ann + slope * xs
238
+ plt.plot(xs, cml, label="CML through VOO")
239
+
240
+ plt.scatter([0.0], [rf_ann], label="Risk free")
241
+ plt.scatter([sigma_mkt], [rf_ann + erp_ann], label="Market VOO")
242
+ plt.scatter([pt_sigma], [pt_mu], label="Your portfolio")
243
+ plt.scatter([same_sigma_sigma], [same_sigma_mu], label="Efficient same sigma")
244
+ plt.scatter([same_mu_sigma], [same_mu_mu], label="Efficient same return")
245
+ if targ_sigma is not None and targ_mu is not None:
246
+ plt.scatter([targ_sigma], [targ_mu], label="Target suggestion")
247
+
248
+ # Gap guides
249
+ plt.plot([pt_sigma, same_sigma_sigma], [pt_mu, same_sigma_mu], linestyle="--", linewidth=1.2, alpha=0.7, color="gray")
250
+ d_ret = (same_sigma_mu - pt_mu) * 100.0
251
+ plt.annotate(
252
+ f"Return gain at same sigma {d_ret:+.2f}%",
253
+ xy=(same_sigma_sigma, same_sigma_mu),
254
+ xytext=(same_sigma_sigma + 0.02 * xmax, same_sigma_mu),
255
+ arrowprops=dict(arrowstyle="->", lw=1.0),
256
+ fontsize=9,
257
+ va="center",
258
+ )
259
+
260
+ plt.plot([pt_sigma, same_mu_sigma], [pt_mu, same_mu_mu], linestyle="--", linewidth=1.2, alpha=0.7, color="gray")
261
+ d_sig = (same_mu_sigma - pt_sigma) * 100.0
262
+ plt.annotate(
263
+ f"Risk change at same return {d_sig:+.2f}%",
264
+ xy=(same_mu_sigma, same_mu_mu),
265
+ xytext=(same_mu_sigma, same_mu_mu + 0.03),
266
+ arrowprops=dict(arrowstyle="->", lw=1.0),
267
+ fontsize=9,
268
+ ha="center",
269
+ )
270
+
271
+ plt.xlabel("Standard deviation")
272
+ plt.ylabel("Expected return")
273
+ plt.legend(loc="best")
274
+ plt.tight_layout()
275
+
276
+ buf = io.BytesIO()
277
+ plt.savefig(buf, format="png")
278
+ plt.close(fig)
279
+ buf.seek(0)
280
+ return Image.open(buf)
281
+
282
+ # -------------- synthetic dataset --------------
283
+ def synth_profile(seed: int) -> str:
284
+ rng = np.random.default_rng(seed)
285
+ risk = rng.choice(["cautious", "balanced", "moderate", "growth", "aggressive"])
286
+ horizon = rng.choice(["three years", "five years", "seven years", "ten years", "fifteen years"])
287
+ goal = rng.choice(["retirement savings", "first home", "education fund", "wealth building", "travel fund", "emergency buffer"])
288
+ return f"{risk} investor, {horizon} horizon, goal is {goal}."
289
+
290
+ def build_synthetic_dataset(universe: List[str], years: int, rf_ann: float, erp_ann: float) -> pd.DataFrame:
291
+ symbols = list(sorted(set([s for s in universe if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
292
+ moms = estimate_all_moments_aligned(symbols, years, rf_ann)
293
+ covA, betas = moms["cov_ann"], moms["betas"]
294
+ rows, rng = [], np.random.default_rng(123)
295
+ for i in range(1000):
296
+ k = rng.integers(low=min(2, len(symbols)), high=min(8, len(symbols)) + 1)
297
+ picks = list(rng.choice(symbols, size=k, replace=False))
298
+ signs = rng.choice([-1.0, 1.0], size=k, p=[0.25, 0.75])
299
+ raw = rng.dirichlet(np.ones(k))
300
+ gross = 1.0 + float(rng.gamma(2.0, 0.5))
301
+ w = gross * signs * raw
302
+ beta_p, er_p, sigma_p = portfolio_stats({picks[j]: w[j] for j in range(k)}, covA, betas, rf_ann, erp_ann)
303
+ rows.append({
304
+ "id": i,
305
+ "profile_text": synth_profile(10_000 + i),
306
+ "tickers": ",".join(picks),
307
+ "weights": ",".join(f"{x:.4f}" for x in w),
308
+ "beta_p": beta_p,
309
+ "er_p": er_p,
310
+ "sigma_p": sigma_p
311
  })
312
+ return pd.DataFrame(rows)
313
+
314
+ def save_synth_csv(df: pd.DataFrame, path: str = DATASET_PATH):
315
+ os.makedirs(os.path.dirname(path), exist_ok=True)
316
+ df.to_csv(path, index=False)
317
+
318
+ # ----------- surrogate from saved CSV only -----------
319
+ def _row_to_exposures(row: pd.Series, universe: List[str]) -> Optional[np.ndarray]:
320
+ try:
321
+ ts = [t.strip() for t in str(row["tickers"]).split(",")]
322
+ ws = [float(x) for x in str(row["weights"]).split(",")]
323
+ wmap = {t: ws[i] for i, t in enumerate(ts) if i < len(ws)}
324
+ w = np.array([wmap.get(t, 0.0) for t in universe], dtype=float)
325
+ gross = float(np.sum(np.abs(w)))
326
+ if gross <= 1e-12:
327
+ return None
328
+ return w / gross
329
+ except Exception:
330
+ return None
331
+
332
+ def fit_surrogate_from_csv(csv_path: str, universe: List[str]):
333
+ try:
334
+ df = pd.read_csv(csv_path)
335
+ except Exception:
336
+ return None, None, 0
337
+ X_list, Y_list = [], []
338
+ for _, r in df.iterrows():
339
+ x = _row_to_exposures(r, universe)
340
+ if x is None:
341
+ continue
342
+ y = np.array([float(r["er_p"]), float(r["sigma_p"]), float(r["beta_p"])], dtype=float)
343
+ X_list.append(x); Y_list.append(y)
344
+ if not X_list:
345
+ return None, None, 0
346
+ X = np.vstack(X_list); Y = np.vstack(Y_list)
347
+ scaler = StandardScaler().fit(X)
348
+ Xn = scaler.transform(X)
349
+ k = min(25, len(Xn))
350
+ knn = KNeighborsRegressor(n_neighbors=k, weights="distance")
351
+ knn.fit(Xn, Y)
352
+ return scaler, knn, len(Xn)
353
+
354
+ def predict_from_surrogate(amounts_map: Dict[str, float], universe: List[str],
355
+ scaler: StandardScaler, knn: KNeighborsRegressor):
356
+ gross = sum(abs(v) for v in amounts_map.values())
357
+ if gross <= 1e-12:
358
+ return None
359
+ w = np.array([amounts_map.get(t, 0.0) for t in universe], dtype=float) / gross
360
+ yhat = knn.predict(scaler.transform([w]))[0]
361
+ er_hat, sigma_hat, beta_hat = float(yhat[0]), float(yhat[1]), float(yhat[2])
362
+ return er_hat, sigma_hat, beta_hat
363
+
364
+ # ----------- target search over synthetic dataset -----------
365
+ def target_best_from_synth(csv_path: str,
366
+ universe: List[str],
367
+ target_mu: Optional[float],
368
+ target_sigma: Optional[float]):
369
+ try:
370
+ df = pd.read_csv(csv_path)
371
+ except Exception:
372
+ return None
373
+
374
+ if target_mu is None and target_sigma is None:
375
+ return None
376
+
377
+ rows = []
378
+ for _, r in df.iterrows():
379
+ x = _row_to_exposures(r, universe)
380
+ if x is None:
381
+ continue
382
+ rows.append((x, float(r["er_p"]), float(r["sigma_p"]), float(r["beta_p"]), r))
383
+
384
+ if not rows:
385
+ return None
386
+
387
+ mu_w = 1.0
388
+ sig_w = 1.0
389
+ best = None
390
+ best_d = float("inf")
391
+ for x, er_p, sig_p, beta_p, r in rows:
392
+ d = 0.0
393
+ if target_mu is not None:
394
+ d += mu_w * (er_p - target_mu) ** 2
395
+ if target_sigma is not None:
396
+ d += sig_w * (sig_p - target_sigma) ** 2
397
+ if d < best_d:
398
+ best_d = d
399
+ best = (x, er_p, sig_p, beta_p, r)
400
+
401
+ if best is None:
402
+ return None
403
+
404
+ x, er_p, sig_p, beta_p, r = best
405
+ wmap = {t: float(x[i]) for i, t in enumerate(universe) if abs(float(x[i])) > 1e-4}
406
+ top = sorted(wmap.items(), key=lambda kv: -abs(kv[1]))[:12]
407
+ wmap_top = dict(top)
408
+ return {"weights": wmap_top, "er": er_p, "sigma": sig_p, "beta": beta_p}
409
+
410
+ # -------------- summary builder --------------
411
+ def fmt_pct(x: float) -> str:
412
+ return f"{x*100:.2f}%"
413
+
414
+ def humanize_synth(er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta):
415
+ close_mu = abs(dmu) <= 0.005
416
+ close_sig = abs(dsig) <= 0.005
417
+ close_beta = abs(dbeta) <= 0.05
418
+ parts = []
419
+ parts.append(f"- Predicted annual return {fmt_pct(er_hat)} , difference {fmt_pct(dmu)}")
420
+ parts.append(f"- Predicted annual volatility {fmt_pct(sigma_hat)} , difference {fmt_pct(dsig)}")
421
+ parts.append(f"- Predicted beta {beta_hat:.2f} , difference {dbeta:+.02f}")
422
+ if close_mu and close_sig and close_beta:
423
+ verdict = "The synthetic model matches the historical calculation closely. You can trust these quick predictions for similar mixes."
424
+ else:
425
+ verdict = "The synthetic model is not very close here. Rely more on the historical calculation for this mix."
426
+ return "\n".join(parts + ["", f"**Verdict** {verdict}"])
427
+
428
+ def build_summary_md(lookback, horizon, rf, rf_code, erp, sigma_mkt,
429
+ beta_p, er_p, sigma_p,
430
+ a_sigma, b_sigma, mu_eff_sigma,
431
+ a_mu, b_mu, sigma_eff_mu,
432
+ synth=None, synth_nrows: int = 0,
433
+ targ=None) -> str:
434
+ lines = []
435
+ lines.append("### Inputs")
436
+ lines.append(f"- Lookback years {lookback}")
437
+ lines.append(f"- Horizon years {int(round(horizon))}")
438
+ lines.append(f"- Risk free {fmt_pct(rf)} from {rf_code}")
439
+ lines.append(f"- Market ERP {fmt_pct(erp)}")
440
+ lines.append(f"- Market sigma {fmt_pct(sigma_mkt)}")
441
+ lines.append("")
442
+ lines.append("### Your portfolio")
443
+ lines.append(f"- Beta {beta_p:.2f}")
444
+ lines.append(f"- Sigma {fmt_pct(sigma_p)}")
445
+ lines.append(f"- Expected return {fmt_pct(er_p)}")
446
+ if synth is not None:
447
+ er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta = synth
448
+ lines.append("")
449
+ lines.append("### Synthetic prediction from data/investor_profiles.csv")
450
+ lines.append(f"- Samples used {synth_nrows}")
451
+ lines.append(humanize_synth(er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta))
452
+ if targ is not None:
453
+ lines.append("")
454
+ lines.append("### Target driven suggestion from synthetic dataset")
455
+ lines.append(f"- Suggested expected return {fmt_pct(targ['er'])}")
456
+ lines.append(f"- Suggested sigma {fmt_pct(targ['sigma'])}")
457
+ lines.append(f"- Suggested beta {targ['beta']:.2f}")
458
+ pretty = ", ".join([f"{k} {v:+.2f}" for k, v in targ["weights"].items()])
459
+ lines.append(f"- Weights, exposure terms {pretty}")
460
+ lines.append("")
461
+ lines.append("### Efficient alternatives on CML")
462
+ lines.append("Efficient same sigma")
463
+ lines.append(f"- Market weight {a_sigma:.2f} , Bills weight {b_sigma:.2f}")
464
+ lines.append(f"- Expected return {fmt_pct(mu_eff_sigma)}")
465
+ lines.append("Efficient same return")
466
+ lines.append(f"- Market weight {a_mu:.2f} , Bills weight {b_mu:.2f}")
467
+ lines.append(f"- Sigma {fmt_pct(sigma_eff_mu)}")
468
+ return "\n".join(lines)
469
+
470
+ # -------------- app state on launch --------------
471
+ ensure_data_dir()
472
+ UNIVERSE = [MARKET_TICKER, "QQQ", "XLK", "XLP", "XLE", "VNQ", "IEF", "HYG", "GLD", "EEM"]
473
+ HORIZON_YEARS = 5
474
+ RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
475
+ RF_ANN = fetch_fred_yield_annual(RF_CODE)
476
+
477
+ # -------------- gradio callbacks --------------
478
+ def search_tickers_cb(q: str):
479
+ hits = yahoo_search(q)
480
+ if not hits:
481
+ return "No matches", []
482
+ opts = [f"{h['symbol']} | {h['name']} | {h['exchange']}" for h in hits]
483
+ return "Select a symbol and click Add", opts
484
+
485
+ def add_symbol(selection: str, table: pd.DataFrame):
486
+ if not selection:
487
+ return table, "Pick a row from Matches first"
488
+ symbol = selection.split("|")[0].strip().upper()
489
+ current = [] if table is None or len(table) == 0 else [str(x).upper() for x in table["ticker"].tolist() if str(x) != "nan"]
490
+ tickers = current if symbol in current else current + [symbol]
491
+ val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
492
+ tickers = [t for t in tickers if t in val]
493
+ amt_map = {}
494
+ if table is not None and len(table) > 0:
495
+ for _, r in table.iterrows():
496
+ t = str(r.get("ticker", "")).upper()
497
+ if t in tickers:
498
+ amt_map[t] = float(pd.to_numeric(r.get("amount_usd", 0.0), errors="coerce") or 0.0)
499
+ new_table = pd.DataFrame({"ticker": tickers, "amount_usd": [amt_map.get(t, 0.0) for t in tickers]})
500
+ msg = f"Added {symbol}" if symbol in tickers else f"{symbol} not valid"
501
+ if len(new_table) > MAX_TICKERS:
502
+ new_table = new_table.iloc[:MAX_TICKERS]
503
+ msg = f"Reached max of {MAX_TICKERS}"
504
+ return new_table, msg
505
+
506
+ def lock_ticker_column(tb: pd.DataFrame):
507
+ if tb is None or len(tb) == 0:
508
+ return pd.DataFrame(columns=["ticker", "amount_usd"])
509
+ tickers = [str(x).upper() for x in tb["ticker"].tolist()]
510
+ amounts = pd.to_numeric(tb["amount_usd"], errors="coerce").fillna(0.0).tolist()
511
+ val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
512
+ tickers = [t for t in tickers if t in val]
513
+ amounts = amounts[:len(tickers)] + [0.0] * max(0, len(tickers) - len(amounts))
514
+ return pd.DataFrame({"ticker": tickers, "amount_usd": amounts})
515
+
516
+ def set_horizon(years: float):
517
+ y = max(1.0, min(100.0, float(years)))
518
+ code = fred_series_for_horizon(y)
519
+ rf = fetch_fred_yield_annual(code)
520
+ global HORIZON_YEARS, RF_CODE, RF_ANN
521
+ HORIZON_YEARS = y
522
+ RF_CODE = code
523
+ RF_ANN = rf
524
+ return f"Risk free series {code}. Latest annual rate {rf:.2%}. Dataset will use this rate on compute."
525
+
526
+ def compute(years_lookback: int, table: pd.DataFrame,
527
+ target_mu: Optional[float], target_sigma: Optional[float],
528
+ use_synth: bool):
529
+ if table is None or len(table) == 0:
530
+ return None, "Add at least one ticker", "Universe empty", empty_positions_df(), empty_suggest_df(), None
531
+
532
+ df = table.dropna()
533
+ df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
534
+ df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
535
+
536
+ symbols = [t for t in df["ticker"].tolist() if t]
537
+ if len(symbols) == 0:
538
+ return None, "Add at least one ticker", "Universe empty", empty_positions_df(), empty_suggest_df(), None
539
+
540
+ symbols = validate_tickers(symbols, years_lookback)
541
+ if len(symbols) == 0:
542
+ return None, "Could not validate any tickers", "Universe invalid", empty_positions_df(), empty_suggest_df(), None
543
+
544
+ global UNIVERSE
545
+ UNIVERSE = list(sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
546
+
547
+ df = df[df["ticker"].isin(symbols)].copy()
548
+ amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
549
+ rf_ann = RF_ANN
550
+
551
+ moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
552
+ betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
553
+
554
+ gross = sum(abs(v) for v in amounts.values())
555
+ if gross == 0:
556
+ return None, "All amounts are zero", "Universe ok", empty_positions_df(), empty_suggest_df(), None
557
+ weights = {k: v / gross for k, v in amounts.items()}
558
+
559
+ beta_p, er_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
560
+
561
+ a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
562
+ a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_p, rf_ann, erp_ann, sigma_mkt)
563
+
564
+ # ensure dataset exists once
565
+ if not os.path.exists(DATASET_PATH):
566
+ synth_df = build_synthetic_dataset(
567
+ universe=list(sorted(set(symbols + [MARKET_TICKER]))),
568
+ years=DEFAULT_LOOKBACK_YEARS,
569
+ rf_ann=rf_ann,
570
+ erp_ann=erp_ann
571
+ )
572
+ save_synth_csv(synth_df)
573
+ csv_path = DATASET_PATH if os.path.exists(DATASET_PATH) else None
574
+
575
+ scaler, knn, nrows = None, None, 0
576
+ synth_tuple = None
577
+ if use_synth and csv_path:
578
+ scaler, knn, nrows = fit_surrogate_from_csv(csv_path, UNIVERSE)
579
+ if scaler is not None and knn is not None:
580
+ pred = predict_from_surrogate(amounts, UNIVERSE, scaler, knn)
581
+ if pred is not None:
582
+ er_hat, sigma_hat, beta_hat = pred
583
+ synth_tuple = (
584
+ er_hat, sigma_hat, beta_hat,
585
+ er_hat - er_p, sigma_hat - sigma_p, beta_hat - beta_p
586
+ )
587
+
588
+ # target driven suggestion from synthetic dataset
589
+ targ = None
590
+ targ_table = empty_suggest_df()
591
+ targ_sigma_plot = None
592
+ targ_mu_plot = None
593
+ if csv_path and (target_mu is not None or target_sigma is not None):
594
+ cand = target_best_from_synth(csv_path, UNIVERSE, target_mu, target_sigma)
595
+ if cand is not None:
596
+ targ = cand
597
+ targ_sigma_plot = cand["sigma"]
598
+ targ_mu_plot = cand["er"]
599
+ rows = [{"ticker": k, "suggested_weight_exposure": v} for k, v in cand["weights"].items()]
600
+ targ_table = pd.DataFrame(rows, columns=SUG_COLS)
601
+
602
+ img = plot_cml(
603
+ rf_ann, erp_ann, sigma_mkt,
604
+ sigma_p, er_p,
605
+ sigma_p, mu_eff_sigma,
606
+ sigma_eff_mu, er_p,
607
+ targ_sigma=targ_sigma_plot, targ_mu=targ_mu_plot
608
+ )
609
+
610
+ info = build_summary_md(
611
+ years_lookback, HORIZON_YEARS, rf_ann, RF_CODE, erp_ann, sigma_mkt,
612
+ beta_p, er_p, sigma_p,
613
+ a_sigma, b_sigma, mu_eff_sigma,
614
+ a_mu, b_mu, sigma_eff_mu,
615
+ synth=synth_tuple, synth_nrows=nrows,
616
+ targ=targ
617
+ )
618
+
619
+ rows = []
620
+ for t in symbols:
621
+ beta_val = 1.0 if t == MARKET_TICKER else betas.get(t, np.nan)
622
+ rows.append({
623
+ "ticker": t,
624
+ "amount_usd": amounts.get(t, 0.0),
625
+ "weight_exposure": weights.get(t, 0.0),
626
+ "beta": beta_val,
627
+ })
628
+ pos_table = pd.DataFrame(rows, columns=POS_COLS)
629
+
630
+ uni_msg = f"Universe set to {', '.join(UNIVERSE)}"
631
+ return img, info, uni_msg, pos_table, targ_table, csv_path
632
+
633
+ # -------------- UI --------------
634
+ ensure_data_dir()
635
+
636
+ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
637
+ gr.Markdown(
638
+ "## Efficient Portfolio Advisor\n"
639
+ "Search symbols, enter dollar amounts, set your horizon. "
640
+ "Prices come from Yahoo Finance. Risk free comes from FRED."
641
+ )
642
+
643
+ with gr.Row():
644
+ with gr.Column(scale=1):
645
+ q = gr.Textbox(label="Search symbol")
646
+ search_note = gr.Markdown()
647
+ matches = gr.Dropdown(choices=[], label="Matches")
648
+ search_btn = gr.Button("Search")
649
+ add_btn = gr.Button("Add selected to portfolio")
650
+
651
+ gr.Markdown("### Portfolio positions (type dollar amounts; negatives allowed for shorts)")
652
+ table = gr.Dataframe(
653
+ headers=["ticker", "amount_usd"],
654
+ datatype=["str", "number"],
655
+ row_count=0,
656
+ col_count=(2, "fixed")
657
+ )
658
+
659
+ horizon = gr.Number(label="Horizon in years (1–100)", value=5, precision=0)
660
+ lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for beta & sigma")
661
+
662
+ gr.Markdown("### Optional targets on the CML")
663
+ target_mu = gr.Number(label="Target expected return (annual, e.g. 0.12 = 12%)", value=None, precision=6)
664
+ target_sigma = gr.Number(label="Target sigma (annual, e.g. 0.18 = 18%)", value=None, precision=6)
665
+ use_synth = gr.Checkbox(label="Use synthetic predictor", value=True)
666
+
667
+ run_btn = gr.Button("Compute and suggest")
668
+ with gr.Column(scale=1):
669
+ plot = gr.Image(label="Capital Market Line", type="pil")
670
+ summary = gr.Markdown(label="Summary")
671
+ universe_msg = gr.Textbox(label="Universe status", interactive=False)
672
+ positions = gr.Dataframe(
673
+ label="Computed positions",
674
+ headers=POS_COLS,
675
+ datatype=["str", "number", "number", "number"],
676
+ col_count=(len(POS_COLS), "fixed"),
677
+ value=empty_positions_df(),
678
+ interactive=False
679
+ )
680
+ suggestions = gr.Dataframe(
681
+ label="Suggested portfolio from targets",
682
+ headers=SUG_COLS,
683
+ datatype=["str", "number"],
684
+ col_count=(len(SUG_COLS), "fixed"),
685
+ value=empty_suggest_df(),
686
+ interactive=False
687
+ )
688
+ dl = gr.File(label="Session CSV path", value=None, visible=True)
689
+
690
+ def do_search(query):
691
+ note, options = search_tickers_cb(query)
692
+ return note, gr.update(choices=options)
693
+
694
+ search_btn.click(fn=do_search, inputs=q, outputs=[search_note, matches])
695
+ add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
696
+ table.change(fn=lock_ticker_column, inputs=table, outputs=table)
697
+ horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
698
 
699
+ run_btn.click(
700
+ fn=compute,
701
+ inputs=[lookback, table, target_mu, target_sigma, use_synth],
702
+ outputs=[plot, summary, universe_msg, positions, suggestions, dl]
703
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
704
 
705
  if __name__ == "__main__":
706
+ # Disable SSR to avoid experimental issues in some deployments
707
+ demo.launch(ssr_mode=False)