Tulitula commited on
Commit
fbe9e4a
·
verified ·
1 Parent(s): 2e79685

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +404 -507
app.py CHANGED
@@ -1,67 +1,53 @@
1
- import os, io, math, json, warnings
2
- warnings.filterwarnings("ignore")
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- from typing import List, Tuple, Dict, Optional
5
 
6
  import numpy as np
7
  import pandas as pd
8
  import matplotlib.pyplot as plt
9
  from PIL import Image
 
 
10
  import requests
11
  import yfinance as yf
12
- import gradio as gr
13
 
14
- from sentence_transformers import SentenceTransformer
 
 
 
 
 
 
 
 
 
 
15
 
16
- # ==============================
17
- # Config
18
- # ==============================
19
  DATA_DIR = "data"
20
- DATASET_PATH = os.path.join(DATA_DIR, "investor_profiles.csv")
21
 
 
22
  MAX_TICKERS = 30
23
- DEFAULT_LOOKBACK_YEARS = 5
24
-
25
- # Try these in order for "market"
26
- MARKET_CANDIDATES = ["VOO", "SPY", "IVV"]
27
-
28
- # Gradio table schemas
29
- POS_COLS = ["ticker", "amount_usd", "weight_exposure", "beta"]
30
- SUG_COLS = ["ticker", "weight_pct", "amount_usd"]
31
-
32
- # Globals (updated on events)
33
- HORIZON_YEARS = 5.0
34
- RF_CODE = "DGS5"
35
- RF_ANN = 0.03
36
-
37
- # Lazy-loaded embedding model
38
- _EMB_MODEL = None
39
-
40
- # ==============================
41
- # Small utils
42
- # ==============================
43
- def ensure_data_dir():
44
- os.makedirs(DATA_DIR, exist_ok=True)
45
-
46
- def fmt_pct(x: float) -> str:
47
- try:
48
- return f"{float(x)*100:.2f}%"
49
- except Exception:
50
- return "0.00%"
51
-
52
- def _pct(x):
53
- """Return x in percent; accepts float or numpy array."""
54
- return np.asarray(x, dtype=float) * 100.0
55
-
56
- def empty_positions_df():
57
- return pd.DataFrame(columns=POS_COLS)
58
 
59
- def empty_suggest_df():
60
- return pd.DataFrame(columns=SUG_COLS)
61
-
62
- # ==============================
63
- # Risk-free via FRED
64
- # ==============================
65
  FRED_MAP = [
66
  (1, "DGS1"),
67
  (2, "DGS2"),
@@ -71,9 +57,11 @@ FRED_MAP = [
71
  (10, "DGS10"),
72
  (20, "DGS20"),
73
  (30, "DGS30"),
74
- (100, "DGS30"),
75
  ]
76
 
 
 
77
  def fred_series_for_horizon(years: float) -> str:
78
  y = max(1.0, min(100.0, float(years)))
79
  for cutoff, code in FRED_MAP:
@@ -92,85 +80,103 @@ def fetch_fred_yield_annual(code: str) -> float:
92
  except Exception:
93
  return 0.03
94
 
95
- # ==============================
96
- # Prices & returns (robust to yfinance shapes)
97
- # ==============================
98
- def _extract_close(df: pd.DataFrame) -> pd.DataFrame:
99
- if isinstance(df, pd.Series):
100
- return df.to_frame()
101
- if isinstance(df.columns, pd.MultiIndex):
102
- for key in ["Close", "Adj Close"]:
103
- try:
104
- c = df.xs(key, axis=1, level=0)
105
- return c
106
- except Exception:
107
- pass
108
- lvl0 = list(dict.fromkeys(df.columns.get_level_values(0)))
109
- return df.xs(lvl0[0], axis=1, level=0)
110
- else:
111
- if "Close" in df.columns:
112
- return df[["Close"]]
113
- if "Adj Close" in df.columns:
114
- c = df[["Adj Close"]].copy()
115
- c.columns = ["Close"]
116
- return c
117
- return df
118
-
119
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
120
- tickers = list(dict.fromkeys([t for t in tickers if t]))
121
  if not tickers:
122
  return pd.DataFrame()
123
- start = (pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)).date()
124
- end = pd.Timestamp.today(tz="UTC").date()
125
- df = yf.download(
 
126
  tickers,
127
- start=start,
128
- end=end,
129
  interval="1mo",
130
  auto_adjust=True,
131
  progress=False,
132
  group_by="column"
133
  )
134
- if isinstance(df, pd.DataFrame):
135
- df = _extract_close(df)
136
- df = df.dropna(how="all").fillna(method="ffill")
137
- if df.shape[1] == 1:
138
- col = df.columns[0]
139
- if col in ("Close", "Adj Close"):
140
- if len(tickers) == 1:
141
- df.columns = [tickers[0]]
142
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
145
  return prices.pct_change().dropna(how="all")
146
 
147
- # ==============================
148
- # Aligned moments (market chosen dynamically)
149
- # ==============================
150
- def get_aligned_monthly_returns(symbols: List[str], years: int) -> Tuple[pd.DataFrame, str]:
151
- uniq = [c for c in dict.fromkeys(symbols)]
152
- want = list(dict.fromkeys(uniq + MARKET_CANDIDATES))
153
- px = fetch_prices_monthly(want, years)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  rets = monthly_returns(px)
155
- market = None
156
- for m in MARKET_CANDIDATES:
157
- if m in rets.columns:
158
- market = m
159
- break
160
- if market is None:
161
- raise ValueError("No market proxy (VOO/SPY/IVV) found in returned data.")
162
- cols = [c for c in uniq if c in rets.columns] + [market]
163
  R = rets[cols].dropna(how="any")
164
- R = R.loc[:, ~R.columns.duplicated()]
165
- return R, market
166
 
167
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
168
- R, market = get_aligned_monthly_returns(symbols, years)
169
- if market not in R.columns or R.shape[0] < 3:
170
- raise ValueError("Not enough aligned data.")
 
171
  rf_m = rf_ann / 12.0
172
 
173
- m = R[market]
174
  if isinstance(m, pd.DataFrame):
175
  m = m.iloc[:, 0].squeeze()
176
 
@@ -183,27 +189,19 @@ def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
183
  var_m = max(var_m, 1e-8)
184
 
185
  betas: Dict[str, float] = {}
186
- for s in [c for c in R.columns if c != market]:
187
  ex_s = R[s] - rf_m
188
- b = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1] / var_m)
189
- betas[s] = b
190
- betas[market] = 1.0
191
-
192
- asset_cols = [c for c in R.columns if c != market]
193
- cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
194
- covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
195
-
196
- return {
197
- "betas": betas,
198
- "cov_ann": covA,
199
- "erp_ann": erp_ann,
200
- "sigma_m_ann": sigma_m_ann,
201
- "market": market,
202
- }
203
 
204
- # ==============================
205
- # Portfolio stats (CAPM)
206
- # ==============================
207
  def capm_er(beta: float, rf_ann: float, erp_ann: float) -> float:
208
  return float(rf_ann + beta * erp_ann)
209
 
@@ -213,215 +211,187 @@ def portfolio_stats(weights: Dict[str, float],
213
  rf_ann: float,
214
  erp_ann: float) -> Tuple[float, float, float]:
215
  tickers = list(weights.keys())
216
- if not tickers:
217
- return 0.0, rf_ann, 0.0
218
  w = np.array([weights[t] for t in tickers], dtype=float)
219
  gross = float(np.sum(np.abs(w)))
220
- if gross == 0:
221
  return 0.0, rf_ann, 0.0
222
  w_expo = w / gross
223
  beta_p = float(np.dot([betas.get(t, 0.0) for t in tickers], w_expo))
224
- er_capm = capm_er(beta_p, rf_ann, erp_ann)
225
  cov = cov_ann.reindex(index=tickers, columns=tickers).fillna(0.0).to_numpy()
226
- sigma_p = math.sqrt(float(max(w_expo.T @ cov @ w_expo, 0.0)))
227
- return beta_p, er_capm, sigma_p
228
 
229
- # ==============================
230
- # Efficient points on the CML (back again)
231
- # ==============================
232
- def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
233
  if sigma_mkt <= 1e-12:
234
- return 0.0, 1.0, rf_ann
235
- a = sigma_target / sigma_mkt # market weight
236
- return a, 1.0 - a, rf_ann + a * erp_ann
237
-
238
- def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
239
- if abs(erp_ann) <= 1e-12:
240
- return 0.0, 1.0, rf_ann
241
- a = (mu_target - rf_ann) / erp_ann # market weight
242
- return a, 1.0 - a, abs(a) * sigma_mkt
243
-
244
- # ==============================
245
- # Plot CML with CAPM point (+ efficient points)
246
- # ==============================
247
- def plot_cml(rf_ann: float, erp_ann: float, sigma_mkt: float,
248
- user_beta: float,
249
- suggestion: Optional[Dict] = None,
250
- same_sigma_pt: Optional[Tuple[float, float]] = None,
251
- same_return_pt: Optional[Tuple[float, float]] = None) -> Image.Image:
252
- fig = plt.figure(figsize=(6.4, 4.2), dpi=120)
253
- slope = erp_ann / max(sigma_mkt, 1e-12)
254
- xmax = max(0.3, 2.0 * sigma_mkt)
255
- xs = np.linspace(0.0, xmax, 180)
256
- cml = rf_ann + slope * xs
257
- plt.plot(_pct(xs), _pct(cml), label="CML via Market", linewidth=1.8)
258
 
259
- # Risk-free & market
260
- plt.scatter([_pct(0.0)], [_pct(rf_ann)], label="Risk-free", s=25)
261
- plt.scatter([_pct(sigma_mkt)], [_pct(rf_ann + erp_ann)], label="Market", s=25)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
- # User CAPM point projected onto CML using sigma = |beta| * sigma_mkt
264
- sig_user = abs(user_beta) * sigma_mkt
265
- mu_user = capm_er(user_beta, rf_ann, erp_ann)
266
- plt.scatter([_pct(sig_user)], [_pct(mu_user)], label="Your CAPM point", s=35)
267
 
268
- # Efficient points
269
- if same_sigma_pt is not None:
270
- plt.scatter([_pct(same_sigma_pt[0])], [_pct(same_sigma_pt[1])], marker="^", s=40, label="Efficient (same σ)")
271
- if same_return_pt is not None:
272
- plt.scatter([_pct(same_return_pt[0])], [_pct(same_return_pt[1])], marker="s", s=40, label="Efficient (same return)")
273
 
274
- # Optional suggestion point
275
- if suggestion is not None:
276
- plt.scatter([_pct(float(suggestion["sigma"]))],
277
- [_pct(float(suggestion["er"]))],
278
- label="Selected Suggestion", marker="D", s=35)
279
 
280
- plt.xlabel("σ (annual, %)")
281
  plt.ylabel("Expected return (annual, %)")
282
- plt.legend(loc="best", fontsize=8)
283
  plt.tight_layout()
 
284
  buf = io.BytesIO()
285
  plt.savefig(buf, format="png")
286
  plt.close(fig)
287
  buf.seek(0)
288
  return Image.open(buf)
289
 
290
- # ==============================
291
- # Yahoo symbol search
292
- # ==============================
293
- def yahoo_search(query: str):
294
- if not query or len(query.strip()) == 0:
295
- return []
296
- url = "https://query1.finance.yahoo.com/v1/finance/search"
297
- params = {"q": query.strip(), "quotesCount": 10, "newsCount": 0}
298
- headers = {"User-Agent": "Mozilla/5.0"}
299
- try:
300
- r = requests.get(url, params=params, headers=headers, timeout=10)
301
- r.raise_for_status()
302
- data = r.json()
303
- out = []
304
- for q in data.get("quotes", []):
305
- sym = q.get("symbol")
306
- name = q.get("shortname") or q.get("longname") or ""
307
- exch = q.get("exchDisp") or ""
308
- if sym and sym.isascii():
309
- out.append({"symbol": sym, "name": name, "exchange": exch})
310
- if not out:
311
- out = [{"symbol": query.strip().upper(), "name": "typed symbol", "exchange": "n/a"}]
312
- return out[:10]
313
- except Exception:
314
- return [{"symbol": query.strip().upper(), "name": "typed symbol", "exchange": "n/a"}]
315
-
316
- def validate_tickers(symbols: List[str], years: int) -> List[str]:
317
- base = list(dict.fromkeys([s for s in symbols if s]))
318
- px = fetch_prices_monthly(base + MARKET_CANDIDATES, years)
319
- ok = [s for s in base if s in px.columns]
320
- return ok
321
-
322
- # ==============================
323
- # Synthetic dataset & suggestions
324
- # ==============================
325
- def synth_profile_text(beta: float, sigma: float, er: float, weights: Dict[str, float]) -> str:
326
- top = sorted(weights.items(), key=lambda kv: -abs(kv[1]))[:8]
327
- parts = [f"{k} {abs(v)*100:.1f}%" for k, v in top]
328
- return (
329
- f"portfolio with beta {beta:.2f}, volatility {sigma:.3f}, expected return {er:.3f}; "
330
- f"holdings: " + ", ".join(parts)
331
- )
332
 
 
333
  def build_synthetic_dataset(universe: List[str],
334
- rf_ann: float,
335
- erp_ann: float,
336
  betas: Dict[str, float],
337
- covA: pd.DataFrame,
338
- n_rows: int = 1000,
339
- seed: int = 123) -> pd.DataFrame:
340
- rng = np.random.default_rng(seed)
341
  rows = []
342
- assets = [t for t in universe] # long-only samples
343
  for i in range(n_rows):
344
- k = rng.integers(low=max(2, min(2, len(assets))), high=max(3, min(8, len(assets))) + 1)
345
- picks = list(rng.choice(assets, size=min(k, len(assets)), replace=False))
346
- raw = rng.dirichlet(np.ones(len(picks)))
347
- wmap = {picks[j]: float(raw[j]) for j in range(len(picks))}
348
- beta_p, er_capm, sigma_p = portfolio_stats(wmap, covA, betas, rf_ann, erp_ann)
 
 
349
  rows.append({
350
  "tickers": ",".join(picks),
351
- "weights": ",".join(f"{wmap[t]:.6f}" for t in picks),
352
  "beta": beta_p,
353
- "er": er_capm,
354
- "sigma": sigma_p,
355
- "desc": synth_profile_text(beta_p, sigma_p, er_capm, wmap),
356
  })
357
- df = pd.DataFrame(rows)
358
- return df
359
-
360
- def get_embedding_model():
361
- global _EMB_MODEL
362
- if _EMB_MODEL is None:
363
- _EMB_MODEL = SentenceTransformer("FinLang/finance-embeddings-investopedia")
364
- return _EMB_MODEL
365
-
366
- def encode_texts(texts: List[str]):
367
- model = get_embedding_model()
368
- return model.encode(texts, normalize_embeddings=True)
369
-
370
- def cosine_sim(a: np.ndarray, b: np.ndarray) -> np.ndarray:
371
- return (a @ b.T)
372
-
373
- def select_bucket_candidates(df: pd.DataFrame, bucket: str) -> pd.DataFrame:
374
- # bucket by sigma tertiles
375
- q1 = df["sigma"].quantile(1/3)
376
- q2 = df["sigma"].quantile(2/3)
377
- if bucket == "Low":
378
- return df[df["sigma"] <= q1]
379
- if bucket == "Medium":
380
- return df[(df["sigma"] > q1) & (df["sigma"] <= q2)]
381
- return df[df["sigma"] > q2]
382
-
383
- def parse_weights(row: pd.Series) -> Dict[str, float]:
384
- ts = [t.strip() for t in str(row["tickers"]).split(",")]
385
- ws = [float(x) for x in str(row["weights"]).split(",")]
386
- wmap = {ts[i]: ws[i] for i in range(min(len(ts), len(ws)))}
387
- s = sum(abs(v) for v in wmap.values()) or 1.0
388
- return {k: v / s for k, v in wmap.items()}
389
 
390
- def pick_top3_for_bucket(df: pd.DataFrame, bucket: str) -> List[Dict]:
391
- cand = select_bucket_candidates(df, bucket)
392
- if cand.empty:
393
- return []
394
- query_map = {
395
- "Low": "low risk, stable portfolio, conservative volatility",
396
- "Medium": "balanced risk portfolio, moderate volatility",
397
- "High": "high risk, growth portfolio, higher volatility"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
  }
399
- q = query_map[bucket]
400
- embs_cand = encode_texts(cand["desc"].tolist())
401
- emb_q = encode_texts([q])[0].reshape(1, -1)
402
- sims = cosine_sim(emb_q, embs_cand).flatten()
403
- order = np.argsort(-sims)
404
- picks = []
405
- for idx in order[:3]:
406
- r = cand.iloc[int(idx)]
407
- wmap = parse_weights(r)
408
- picks.append({"weights": wmap, "beta": float(r["beta"]),
409
- "er": float(r["er"]), "sigma": float(r["sigma"])})
410
- return picks
411
-
412
- # ==============================
413
- # Gradio callbacks
414
- # ==============================
415
- def search_tickers_cb(q: str):
416
- hits = yahoo_search(q)
417
- if not hits:
418
- return "No matches", []
419
- opts = [f"{h['symbol']} | {h['name']} | {h['exchange']}" for h in hits]
420
- return "Select a symbol and click Add", opts
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
  def add_symbol(selection: str, table: pd.DataFrame):
423
- if not selection:
424
- return table, "Pick a row from Matches first", gr.update(value=None)
425
  symbol = selection.split("|")[0].strip().upper()
426
  current = [] if table is None or len(table) == 0 else [str(x).upper() for x in table["ticker"].tolist() if str(x) != "nan"]
427
  tickers = current if symbol in current else current + [symbol]
@@ -438,9 +408,9 @@ def add_symbol(selection: str, table: pd.DataFrame):
438
  if len(new_table) > MAX_TICKERS:
439
  new_table = new_table.iloc[:MAX_TICKERS]
440
  msg = f"Reached max of {MAX_TICKERS}"
441
- return new_table, msg, gr.update(value=None)
442
 
443
- def lock_ticker_column(tb: pd.DataFrame):
444
  if tb is None or len(tb) == 0:
445
  return pd.DataFrame(columns=["ticker", "amount_usd"])
446
  tickers = [str(x).upper() for x in tb["ticker"].tolist()]
@@ -460,203 +430,142 @@ def set_horizon(years: float):
460
  RF_ANN = rf
461
  return f"Risk-free series {code}. Latest annual rate {rf:.2%}."
462
 
463
- def build_summary_md(lookback, rf_code, rf, erp, sigma_mkt,
464
- beta_p, er_capm, sigma_cml_user,
465
- market_sym,
466
- a_sigma=None, b_sigma=None, mu_eff_sigma=None,
467
- a_mu=None, b_mu=None, sigma_eff_mu=None) -> str:
468
- lines = []
469
- lines.append("### Inputs")
470
- lines.append(f"- Lookback years {lookback}")
471
- lines.append(f"- Horizon years {int(round(HORIZON_YEARS))}")
472
- lines.append(f"- Risk-free {fmt_pct(rf)} from {rf_code}")
473
- lines.append(f"- Market ERP {fmt_pct(erp)}")
474
- lines.append(f"- Market σ {fmt_pct(sigma_mkt)} (proxy: {market_sym})")
475
- lines.append("")
476
- lines.append("### Your portfolio (CAPM)")
477
- lines.append(f"- Beta {beta_p:.2f}")
478
- lines.append(f"- Expected return (CAPM / SML) {fmt_pct(er_capm)}")
479
- lines.append(f"- σ on CML for your beta (|β|×σ_mkt) {fmt_pct(sigma_cml_user)}")
480
- if (a_sigma is not None) and (a_mu is not None):
481
- lines.append("")
482
- lines.append("### Efficient alternatives on the CML")
483
- lines.append(f"- Same σ as your CAPM point → Market {a_sigma:.2f}, Bills {b_sigma:.2f}, return {fmt_pct(mu_eff_sigma)}")
484
- lines.append(f"- Same expected return (your CAPM μ) → Market {a_mu:.2f}, Bills {b_mu:.2f}, σ {fmt_pct(sigma_eff_mu)}")
485
- return "\n".join(lines)
486
-
487
- def pack_suggestion_table(pick: Dict, gross_usd: float) -> pd.DataFrame:
488
- rows = []
489
- for t, w in sorted(pick["weights"].items(), key=lambda kv: -kv[1]):
490
- rows.append({
491
- "ticker": t,
492
- "weight_pct": float(w) * 100.0,
493
- "amount_usd": float(w) * float(gross_usd)
494
- })
495
- return pd.DataFrame(rows, columns=SUG_COLS)
496
-
497
- def suggestion_metrics_md(pick: Dict) -> str:
498
- return (
499
- f"**Suggested portfolio** \n"
500
- f"- Expected return (CAPM) {fmt_pct(pick['er'])} \n"
501
- f"- σ (annual) {fmt_pct(pick['sigma'])} \n"
502
- f"- Beta {pick['beta']:.2f}"
503
- )
504
-
505
- def compute(years_lookback: int,
506
- table: pd.DataFrame,
507
- risk_choice: str,
508
- pick_choice: str):
509
- # ---------- sanitize input table ----------
510
- if table is None or len(table) == 0:
511
- return None, "Add at least one ticker.", "Universe empty", empty_positions_df(), {}, gr.update(), gr.update(), "", empty_suggest_df()
512
-
513
- df = table.dropna()
514
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
515
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
 
516
  symbols = [t for t in df["ticker"].tolist() if t]
 
 
517
 
518
  symbols = validate_tickers(symbols, years_lookback)
519
  if len(symbols) == 0:
520
- return None, "Could not validate any tickers.", "Universe invalid", empty_positions_df(), {}, gr.update(), gr.update(), "", empty_suggest_df()
521
 
522
- # ---------- amounts & weights ----------
523
- amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows() if r["ticker"] in symbols}
524
- gross = sum(abs(v) for v in amounts.values())
525
- if gross == 0:
526
- return None, "All amounts are zero.", "Universe ok", empty_positions_df(), {}, gr.update(), gr.update(), "", empty_suggest_df()
527
- weights_user = {k: v / gross for k, v in amounts.items()}
528
 
529
- # ---------- risk-free & moments ----------
 
530
  rf_ann = RF_ANN
 
 
531
  moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
532
- betas, covA = moms["betas"], moms["cov_ann"]
533
- erp_ann, sigma_mkt, market_sym = moms["erp_ann"], moms["sigma_m_ann"], moms["market"]
534
 
535
- # ---------- user stats (CAPM) ----------
536
- beta_p, er_capm, _sigma_hist = portfolio_stats(weights_user, covA, betas, rf_ann, erp_ann)
537
- sigma_user_on_cml = abs(beta_p) * sigma_mkt # on CML
 
538
 
539
- # ---------- efficient CML points (back again) ----------
540
- a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_user_on_cml, rf_ann, erp_ann, sigma_mkt)
541
- a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_capm, rf_ann, erp_ann, sigma_mkt)
542
 
543
- # ---------- positions table ----------
544
- rows = []
545
- for t in symbols:
546
- rows.append({
547
- "ticker": t,
548
- "amount_usd": amounts.get(t, 0.0),
549
- "weight_exposure": weights_user.get(t, 0.0),
550
- "beta": 1.0 if abs(betas.get(t, 0.0) - 1.0) < 1e-9 else betas.get(t, np.nan)
551
- })
552
- pos_table = pd.DataFrame(rows, columns=POS_COLS)
553
-
554
- # ---------- synthetic dataset ----------
555
- ensure_data_dir()
556
- synth_df = build_synthetic_dataset(
557
- universe=list(sorted(set(symbols))),
558
- rf_ann=rf_ann,
559
- erp_ann=erp_ann,
560
- betas=betas,
561
- covA=covA,
562
- n_rows=1000,
563
- seed=123
564
- )
565
- try:
566
- synth_df.to_csv(DATASET_PATH, index=False)
567
- except Exception:
568
- pass
569
-
570
- # ---------- pick 3 per bucket using embeddings ----------
571
- low3 = pick_top3_for_bucket(synth_df, "Low")
572
- med3 = pick_top3_for_bucket(synth_df, "Medium")
573
- high3 = pick_top3_for_bucket(synth_df, "High")
574
-
575
- # ---------- build state ----------
576
- state = {
577
- "gross": float(gross),
578
- "picks": {"Low": low3, "Medium": med3, "High": high3},
579
- "rf": float(rf_ann),
580
- "erp": float(erp_ann),
581
- "sigma_mkt": float(sigma_mkt),
582
- "user_beta": float(beta_p),
583
- "same_sigma": (float(sigma_user_on_cml), float(mu_eff_sigma)),
584
- "same_return": (float(sigma_eff_mu), float(er_capm)),
585
- }
586
 
587
- # ---------- decide which suggestion to show initially ----------
588
- risk = risk_choice if risk_choice in ("Low", "Medium", "High") else "Medium"
589
- pick_idx = 0 if pick_choice not in ("Pick #1", "Pick #2", "Pick #3") else ["Pick #1", "Pick #2", "Pick #3"].index(pick_choice)
590
- picks_list = state["picks"].get(risk, [])
591
- pick = picks_list[pick_idx] if pick_idx < len(picks_list) else (picks_list[0] if picks_list else None)
592
 
593
- # ---------- plot ----------
594
- img = plot_cml(
595
- rf_ann, erp_ann, sigma_mkt, beta_p,
596
- suggestion=pick,
597
- same_sigma_pt=state["same_sigma"],
598
- same_return_pt=state["same_return"]
599
- )
600
-
601
- # ---------- summary ----------
602
- info = build_summary_md(
603
- years_lookback, RF_CODE, rf_ann, erp_ann, sigma_mkt,
604
- beta_p, er_capm, sigma_user_on_cml, market_sym,
605
- a_sigma=a_sigma, b_sigma=b_sigma, mu_eff_sigma=mu_eff_sigma,
606
- a_mu=a_mu, b_mu=b_mu, sigma_eff_mu=sigma_eff_mu
607
- )
608
 
609
- # ---------- suggestion UI ----------
610
- risk_update = gr.update(choices=["Low", "Medium", "High"], value=risk)
611
- pick_update = gr.update(choices=["Pick #1", "Pick #2", "Pick #3"], value="Pick #1")
612
 
613
- if pick is None:
614
- return img, info, f"Universe set to {', '.join(sorted(symbols))}", pos_table, state, risk_update, pick_update, "No suggestions available.", empty_suggest_df()
 
615
 
616
- sug_md = suggestion_metrics_md(pick)
617
- sug_table = pack_suggestion_table(pick, gross)
 
 
 
 
 
 
 
 
 
 
 
 
618
 
619
- return img, info, f"Universe set to {', '.join(sorted(symbols))}", pos_table, state, risk_update, pick_update, sug_md, sug_table
 
 
 
 
 
 
 
 
 
620
 
621
- def update_suggestion(risk: str, pick_name: str, state: dict):
622
- if not state or "picks" not in state:
623
- return gr.update(), "", empty_suggest_df()
624
- picks_list = state["picks"].get(risk, [])
625
- if not picks_list:
626
- return gr.update(), "No suggestions for this bucket.", empty_suggest_df()
627
- idx = ["Pick #1", "Pick #2", "Pick #3"].index(pick_name) if pick_name in ("Pick #1", "Pick #2", "Pick #3") else 0
628
- idx = min(idx, len(picks_list) - 1)
629
- pick = picks_list[idx]
630
  img = plot_cml(
631
- state["rf"], state["erp"], state["sigma_mkt"], state["user_beta"],
632
- suggestion=pick,
633
- same_sigma_pt=state.get("same_sigma"),
634
- same_return_pt=state.get("same_return")
635
  )
636
- sug_md = suggestion_metrics_md(pick)
637
- sug_table = pack_suggestion_table(pick, state.get("gross", 0.0))
638
- return img, sug_md, sug_table
639
-
640
- # ==============================
641
- # Build UI
642
- # ==============================
643
- ensure_data_dir()
644
- RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
645
- RF_ANN = fetch_fred_yield_annual(RF_CODE)
646
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
647
  with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
648
  gr.Markdown(
649
  "## Efficient Portfolio Advisor\n"
650
- "Search symbols, enter **dollar amounts**, set horizon. "
651
- "Returns use Yahoo Finance monthly data; risk-free from FRED. "
652
- "Plot shows **CAPM point on the CML** plus efficient CML points."
653
  )
654
 
655
  with gr.Row():
656
  with gr.Column(scale=1):
657
  q = gr.Textbox(label="Search symbol")
658
  search_note = gr.Markdown()
659
- matches = gr.Dropdown(choices=[], label="Matches", allow_custom_value=True)
660
  search_btn = gr.Button("Search")
661
  add_btn = gr.Button("Add selected to portfolio")
662
 
@@ -668,64 +577,52 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
668
  col_count=(2, "fixed")
669
  )
670
 
671
- horizon = gr.Number(label="Horizon in years (1–100)", value=HORIZON_YEARS, precision=0)
672
  lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for betas & covariances")
673
 
674
- run_btn = gr.Button("Compute")
 
 
 
 
 
675
 
676
  with gr.Column(scale=1):
677
  plot = gr.Image(label="Capital Market Line (CAPM)", type="pil")
678
- summary = gr.Markdown(label="Summary")
679
  universe_msg = gr.Textbox(label="Universe status", interactive=False)
680
 
681
  positions = gr.Dataframe(
682
  label="Computed positions",
683
- headers=POS_COLS,
684
  datatype=["str", "number", "number", "number"],
685
- col_count=(len(POS_COLS), "fixed"),
686
  value=empty_positions_df(),
687
  interactive=False
688
  )
689
 
690
- gr.Markdown("### Dataset-based suggestions (choose risk bucket and pick)")
691
- state = gr.State({})
692
- risk_selector = gr.Radio(choices=["Low", "Medium", "High"], value="Medium", label="Risk bucket to view")
693
- pick_selector = gr.Radio(choices=["Pick #1", "Pick #2", "Pick #3"], value="Pick #1", label="Suggestion")
694
- sugg_metrics = gr.Markdown(label="Suggestion metrics")
695
- suggestions = gr.Dataframe(
696
- label="Suggested holdings",
697
- headers=SUG_COLS,
698
  datatype=["str", "number", "number"],
699
- col_count=(len(SUG_COLS), "fixed"),
700
- value=empty_suggest_df(),
701
  interactive=False
702
  )
703
 
704
- # --- wiring ---
705
- def do_search(query):
706
- note, options = search_tickers_cb(query)
707
- return note, gr.update(choices=options, value=None)
708
 
709
- search_btn.click(fn=do_search, inputs=q, outputs=[search_note, matches])
710
- add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note, matches])
711
- table.change(fn=lock_ticker_column, inputs=table, outputs=table)
 
712
  horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
713
 
 
714
  run_btn.click(
715
  fn=compute,
716
- inputs=[lookback, table, risk_selector, pick_selector],
717
- outputs=[plot, summary, universe_msg, positions, state, risk_selector, pick_selector, sugg_metrics, suggestions]
718
- )
719
-
720
- risk_selector.change(
721
- fn=update_suggestion,
722
- inputs=[risk_selector, pick_selector, state],
723
- outputs=[plot, sugg_metrics, suggestions]
724
- )
725
- pick_selector.change(
726
- fn=update_suggestion,
727
- inputs=[risk_selector, pick_selector, state],
728
- outputs=[plot, sugg_metrics, suggestions]
729
  )
730
 
731
  if __name__ == "__main__":
 
1
+ # app.py
2
+ # Efficient Portfolio Advisor — CAPM on CML + Low/Medium/High suggestion carousel
3
+ # - Search tickers, enter $ amounts (negatives allowed), pick horizon
4
+ # - Plot shows CAPM point on the CML (not historical)
5
+ # - Suggestions are sampled from a 1,000-row dataset generated from your universe
6
+ # - Carousel lets you flip between 3 suggestions in the chosen risk band
7
+ # - Optional: rerank suggestions with finance embeddings (FinLang) to be on-theme
8
+
9
+ import io
10
+ import os
11
+ import math
12
+ import json
13
+ import time
14
+ import warnings
15
+ from typing import Dict, List, Optional, Tuple
16
 
17
+ warnings.filterwarnings("ignore")
18
 
19
  import numpy as np
20
  import pandas as pd
21
  import matplotlib.pyplot as plt
22
  from PIL import Image
23
+
24
+ import gradio as gr
25
  import requests
26
  import yfinance as yf
 
27
 
28
+ # Optional embeddings (won't break if missing GPU; loads once)
29
+ _EMBED_MODEL = None
30
+ def get_embed_model():
31
+ global _EMBED_MODEL
32
+ if _EMBED_MODEL is None:
33
+ try:
34
+ from sentence_transformers import SentenceTransformer
35
+ _EMBED_MODEL = SentenceTransformer("FinLang/finance-embeddings-investopedia")
36
+ except Exception:
37
+ _EMBED_MODEL = None
38
+ return _EMBED_MODEL
39
 
40
+
41
+ # ---------------- Configuration ----------------
 
42
  DATA_DIR = "data"
43
+ os.makedirs(DATA_DIR, exist_ok=True)
44
 
45
+ MARKET_TICKER = "VOO" # proxy for market
46
  MAX_TICKERS = 30
47
+ DEFAULT_LOOKBACK_YEARS = 10
48
+ DEFAULT_HORIZON_YEARS = 10
49
+ SYNTH_ROWS = 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
 
 
 
 
 
 
51
  FRED_MAP = [
52
  (1, "DGS1"),
53
  (2, "DGS2"),
 
57
  (10, "DGS10"),
58
  (20, "DGS20"),
59
  (30, "DGS30"),
60
+ (100,"DGS30"),
61
  ]
62
 
63
+ def ensure_dir(p): os.makedirs(p, exist_ok=True)
64
+
65
  def fred_series_for_horizon(years: float) -> str:
66
  y = max(1.0, min(100.0, float(years)))
67
  for cutoff, code in FRED_MAP:
 
80
  except Exception:
81
  return 0.03
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
84
+ tickers = list(dict.fromkeys([t.upper().strip() for t in tickers if t]))
85
  if not tickers:
86
  return pd.DataFrame()
87
+ start = pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)
88
+ end = pd.Timestamp.today(tz="UTC")
89
+
90
+ raw = yf.download(
91
  tickers,
92
+ start=start.date(),
93
+ end=end.date(),
94
  interval="1mo",
95
  auto_adjust=True,
96
  progress=False,
97
  group_by="column"
98
  )
99
+ if raw is None or len(raw) == 0:
100
+ return pd.DataFrame()
101
+
102
+ # Handle single or multi-index columns
103
+ if isinstance(raw.columns, pd.MultiIndex):
104
+ # level 0: OHLCV, level 1: ticker
105
+ if "Close" in raw.columns.levels[0]:
106
+ closes = raw.xs("Close", axis=1, level=0)
107
+ else:
108
+ # fallback: try Adj Close else last level
109
+ level0 = raw.columns.levels[0].tolist()
110
+ col0 = "Adj Close" if "Adj Close" in level0 else level0[0]
111
+ closes = raw.xs(col0, axis=1, level=0)
112
+ else:
113
+ if "Close" in raw.columns:
114
+ closes = raw[["Close"]]
115
+ elif "Adj Close" in raw.columns:
116
+ closes = raw[["Adj Close"]].rename(columns={"Adj Close":"Close"})
117
+ else:
118
+ closes = raw
119
+
120
+ if isinstance(closes, pd.Series):
121
+ closes = closes.to_frame()
122
+
123
+ # Normalize columns to uppercase tickers
124
+ closes.columns = [str(c).upper() for c in closes.columns]
125
+ closes = closes.dropna(how="all").fillna(method="ffill")
126
+ return closes
127
 
128
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
129
  return prices.pct_change().dropna(how="all")
130
 
131
+ def yahoo_search(query: str):
132
+ if not query or not str(query).strip():
133
+ return []
134
+ url = "https://query1.finance.yahoo.com/v1/finance/search"
135
+ params = {"q": query.strip(), "quotesCount": 10, "newsCount": 0}
136
+ headers = {"User-Agent": "Mozilla/5.0"}
137
+ try:
138
+ r = requests.get(url, params=params, headers=headers, timeout=10)
139
+ r.raise_for_status()
140
+ data = r.json()
141
+ out = []
142
+ for q in data.get("quotes", []):
143
+ sym = q.get("symbol")
144
+ name = q.get("shortname") or q.get("longname") or ""
145
+ exch = q.get("exchDisp") or ""
146
+ if sym and sym.isascii():
147
+ out.append(f"{sym} | {name} | {exch}")
148
+ if not out:
149
+ out = [f"{query.strip().upper()} | typed symbol | n/a"]
150
+ return out[:10]
151
+ except Exception:
152
+ return [f"{query.strip().upper()} | typed symbol | n/a"]
153
+
154
+ def validate_tickers(symbols: List[str], years: int) -> List[str]:
155
+ base = [s for s in dict.fromkeys([t.upper().strip() for t in symbols if t])]
156
+ px = fetch_prices_monthly(base + [MARKET_TICKER], years)
157
+ ok = [t for t in base if t in px.columns]
158
+ return ok
159
+
160
+
161
+ # ---------------- Moments / CAPM ----------------
162
+ def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
163
+ uniq = [c for c in dict.fromkeys([s.upper() for s in symbols if s])]
164
+ if MARKET_TICKER not in uniq:
165
+ uniq.append(MARKET_TICKER)
166
+ px = fetch_prices_monthly(uniq, years)
167
  rets = monthly_returns(px)
168
+ cols = [c for c in uniq if c in rets.columns]
 
 
 
 
 
 
 
169
  R = rets[cols].dropna(how="any")
170
+ return R.loc[:, ~R.columns.duplicated()]
 
171
 
172
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
173
+ R = get_aligned_monthly_returns(symbols, years)
174
+ if MARKET_TICKER not in R.columns or R.shape[0] < 3:
175
+ raise ValueError("Not enough aligned data for market / assets")
176
+
177
  rf_m = rf_ann / 12.0
178
 
179
+ m = R[MARKET_TICKER]
180
  if isinstance(m, pd.DataFrame):
181
  m = m.iloc[:, 0].squeeze()
182
 
 
189
  var_m = max(var_m, 1e-8)
190
 
191
  betas: Dict[str, float] = {}
192
+ for s in [c for c in R.columns if c != MARKET_TICKER]:
193
  ex_s = R[s] - rf_m
194
+ cov_sm = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1])
195
+ betas[s] = cov_sm / var_m
196
+ betas[MARKET_TICKER] = 1.0
197
+
198
+ # IMPORTANT: include the market in covariance (fixes under-estimated sigma)
199
+ cov_cols = list(R.columns)
200
+ cov_m = np.cov(R[cov_cols].values.T, ddof=1)
201
+ covA = pd.DataFrame(cov_m * 12.0, index=cov_cols, columns=cov_cols)
202
+
203
+ return {"betas": betas, "cov_ann": covA, "erp_ann": erp_ann, "sigma_m_ann": sigma_m_ann}
 
 
 
 
 
204
 
 
 
 
205
  def capm_er(beta: float, rf_ann: float, erp_ann: float) -> float:
206
  return float(rf_ann + beta * erp_ann)
207
 
 
211
  rf_ann: float,
212
  erp_ann: float) -> Tuple[float, float, float]:
213
  tickers = list(weights.keys())
 
 
214
  w = np.array([weights[t] for t in tickers], dtype=float)
215
  gross = float(np.sum(np.abs(w)))
216
+ if gross <= 1e-12:
217
  return 0.0, rf_ann, 0.0
218
  w_expo = w / gross
219
  beta_p = float(np.dot([betas.get(t, 0.0) for t in tickers], w_expo))
220
+ mu_capm = capm_er(beta_p, rf_ann, erp_ann)
221
  cov = cov_ann.reindex(index=tickers, columns=tickers).fillna(0.0).to_numpy()
222
+ sigma_hist = float(max(w_expo.T @ cov @ w_expo, 0.0)) ** 0.5 # annualized
223
+ return beta_p, mu_capm, sigma_hist
224
 
225
+
226
+ # ---------------- Efficient points on the CML ----------------
227
+ def efficient_same_sigma_on_cml(sigma_target: float, rf: float, erp: float, sigma_mkt: float) -> float:
228
+ # Expected return on CML at a given sigma
229
  if sigma_mkt <= 1e-12:
230
+ return rf
231
+ a = sigma_target / sigma_mkt
232
+ return rf + a * erp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
+ def efficient_same_return_on_cml(mu_target: float, rf: float, erp: float, sigma_mkt: float) -> float:
235
+ # Sigma on CML needed to hit a target return
236
+ if abs(erp) <= 1e-12:
237
+ return 0.0
238
+ a = (mu_target - rf) / erp
239
+ return abs(a) * sigma_mkt
240
+
241
+
242
+ # ---------------- Plot ----------------
243
+ def _pct(x):
244
+ arr = np.asarray(x, dtype=float)
245
+ return arr * 100.0
246
+
247
+ def plot_cml(
248
+ rf_ann: float,
249
+ erp_ann: float,
250
+ sigma_mkt: float,
251
+ port_beta: float,
252
+ port_mu_capm: float,
253
+ port_sigma_capm: float,
254
+ sugg_mu_capm: Optional[float],
255
+ sugg_sigma_capm: Optional[float],
256
+ ) -> Image.Image:
257
+ fig = plt.figure(figsize=(6.5, 4.2), dpi=120)
258
+
259
+ xmax = max(0.30, sigma_mkt * 2.1, port_sigma_capm * 1.35, (sugg_sigma_capm or 0) * 1.35)
260
+ xs = np.linspace(0.0, xmax, 160)
261
+ cml = rf_ann + (erp_ann / max(sigma_mkt, 1e-12)) * xs
262
+ plt.plot(_pct(xs), _pct(cml), label="CML via Market", linewidth=1.8)
263
 
264
+ # key points
265
+ plt.scatter([_pct(0.0)], [_pct(rf_ann)], label="Risk-free", zorder=3)
266
+ plt.scatter([_pct(sigma_mkt)], [_pct(rf_ann + erp_ann)], label="Market", zorder=3)
 
267
 
268
+ # Your CAPM point
269
+ plt.scatter([_pct(port_sigma_capm)], [_pct(port_mu_capm)], label="Your CAPM point", zorder=4)
 
 
 
270
 
271
+ # Selected suggestion (if any)
272
+ if sugg_mu_capm is not None and sugg_sigma_capm is not None:
273
+ plt.scatter([_pct(sugg_sigma_capm)], [_pct(sugg_mu_capm)], label="Selected Suggestion", zorder=4)
 
 
274
 
275
+ plt.xlabel("σ (annualized, %)")
276
  plt.ylabel("Expected return (annual, %)")
277
+ plt.legend(loc="best")
278
  plt.tight_layout()
279
+
280
  buf = io.BytesIO()
281
  plt.savefig(buf, format="png")
282
  plt.close(fig)
283
  buf.seek(0)
284
  return Image.open(buf)
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
+ # ---------------- Synthetic dataset (universe-driven) ----------------
288
  def build_synthetic_dataset(universe: List[str],
289
+ cov_ann: pd.DataFrame,
 
290
  betas: Dict[str, float],
291
+ rf_ann: float, erp_ann: float,
292
+ n_rows: int = SYNTH_ROWS) -> pd.DataFrame:
293
+ rng = np.random.default_rng(12345)
 
294
  rows = []
295
+ tickers = list(dict.fromkeys([t for t in universe if t]))
296
  for i in range(n_rows):
297
+ k = int(rng.integers(low=max(2, min(2, len(tickers))), high=min(8, len(tickers)) + 1))
298
+ picks = list(rng.choice(tickers, size=k, replace=False))
299
+ w = rng.dirichlet(np.ones(k)) # long-only exposure
300
+ # stats
301
+ wmap = {picks[j]: float(w[j]) for j in range(k)}
302
+ beta_p, mu_capm, sigma_hist = portfolio_stats(wmap, cov_ann, betas, rf_ann, erp_ann)
303
+ sigma_capm = abs(beta_p) * (cov_ann.loc[MARKET_TICKER, MARKET_TICKER] ** 0.5) if MARKET_TICKER in cov_ann.index else 0.0
304
  rows.append({
305
  "tickers": ",".join(picks),
306
+ "weights": ",".join(f"{x:.6f}" for x in w),
307
  "beta": beta_p,
308
+ "mu_capm": mu_capm,
309
+ "sigma_hist": sigma_hist,
310
+ "sigma_capm": sigma_capm
311
  })
312
+ return pd.DataFrame(rows)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
 
314
+ def parse_row_exposures(row: pd.Series, universe: List[str]) -> Optional[np.ndarray]:
315
+ try:
316
+ ts = [t.strip() for t in str(row["tickers"]).split(",")]
317
+ ws = [float(x) for x in str(row["weights"]).split(",")]
318
+ mp = {ts[i]: ws[i] for i in range(min(len(ts), len(ws)))}
319
+ v = np.array([mp.get(t, 0.0) for t in universe], dtype=float)
320
+ s = float(v.sum())
321
+ if s <= 1e-12: return None
322
+ return v / s
323
+ except Exception:
324
+ return None
325
+
326
+ def select_band(df: pd.DataFrame, band: str) -> pd.DataFrame:
327
+ if df.empty: return df
328
+ q = df["sigma_capm"].quantile
329
+ if band == "Low":
330
+ lo, hi = -1.0, q(0.25)
331
+ elif band == "Medium":
332
+ lo, hi = q(0.40), q(0.60)
333
+ else: # High
334
+ lo, hi = q(0.75), float("inf")
335
+ cut = df[(df["sigma_capm"] >= lo) & (df["sigma_capm"] <= hi)].copy()
336
+ if cut.empty:
337
+ return df.nsmallest(3, "sigma_capm") if band == "Low" else df.nlargest(3, "sigma_capm")
338
+ return cut
339
+
340
+ def top3_by_return_in_band(df: pd.DataFrame, band: str) -> pd.DataFrame:
341
+ band_df = select_band(df, band)
342
+ return band_df.sort_values("mu_capm", ascending=False).head(3).reset_index(drop=True)
343
+
344
+
345
+ # ---------------- Embeddings rerank (optional) ----------------
346
+ def rerank_with_embeddings(df3: pd.DataFrame, band: str) -> pd.DataFrame:
347
+ model = get_embed_model()
348
+ if model is None or df3.empty:
349
+ return df3
350
+
351
+ prompts = {
352
+ "Low" : "low risk diversified ETF mix, low beta, low volatility",
353
+ "Medium": "balanced risk ETF mix, moderate beta, medium volatility",
354
+ "High" : "high risk growth ETF mix, higher beta, higher volatility"
355
  }
356
+ q = prompts.get(band, "balanced portfolio")
357
+
358
+ docs = []
359
+ for _, r in df3.iterrows():
360
+ docs.append(
361
+ f"tickers={r['tickers']} weights={r['weights']} "
362
+ f"beta={r['beta']:.3f} mu_capm={r['mu_capm']:.3f} sigma_capm={r['sigma_capm']:.3f}"
363
+ )
364
+ try:
365
+ E = model.encode([q] + docs, normalize_embeddings=True)
366
+ qv = E[0:1]
367
+ dv = E[1:]
368
+ sims = (qv @ dv.T).ravel()
369
+ order = np.argsort(-sims)
370
+ return df3.iloc[order].reset_index(drop=True)
371
+ except Exception:
372
+ return df3
373
+
374
+
375
+ # ---------------- Gradio helpers ----------------
376
+ def empty_positions_df():
377
+ return pd.DataFrame(columns=["ticker", "amount_usd", "weight_exposure", "beta"])
378
+
379
+ def empty_suggestion_df():
380
+ return pd.DataFrame(columns=["ticker", "weight_%", "amount_$"])
381
+
382
+ UNIVERSE: List[str] = [MARKET_TICKER]
383
+ HORIZON_YEARS = DEFAULT_HORIZON_YEARS
384
+ RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
385
+ RF_ANN = fetch_fred_yield_annual(RF_CODE)
386
+
387
+ def search_cb(q: str):
388
+ opts = yahoo_search(q)
389
+ note = "Select a row and click 'Add selected to portfolio'." if opts else "No matches."
390
+ return note, gr.update(choices=opts, value=None)
391
 
392
  def add_symbol(selection: str, table: pd.DataFrame):
393
+ if not selection or "|" not in selection:
394
+ return table, "Pick a symbol from Matches first."
395
  symbol = selection.split("|")[0].strip().upper()
396
  current = [] if table is None or len(table) == 0 else [str(x).upper() for x in table["ticker"].tolist() if str(x) != "nan"]
397
  tickers = current if symbol in current else current + [symbol]
 
408
  if len(new_table) > MAX_TICKERS:
409
  new_table = new_table.iloc[:MAX_TICKERS]
410
  msg = f"Reached max of {MAX_TICKERS}"
411
+ return new_table, msg
412
 
413
+ def lock_table(tb: pd.DataFrame):
414
  if tb is None or len(tb) == 0:
415
  return pd.DataFrame(columns=["ticker", "amount_usd"])
416
  tickers = [str(x).upper() for x in tb["ticker"].tolist()]
 
430
  RF_ANN = rf
431
  return f"Risk-free series {code}. Latest annual rate {rf:.2%}."
432
 
433
+ def to_pct_str(x): return f"{x*100:.2f}%"
434
+
435
+ def compute(
436
+ years_lookback: int,
437
+ table: pd.DataFrame,
438
+ risk_band: str,
439
+ use_embeddings: bool,
440
+ pick_idx: int
441
+ ):
442
+ # --- inputs
443
+ df = (table or pd.DataFrame(columns=["ticker","amount_usd"])).dropna()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
445
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
446
+
447
  symbols = [t for t in df["ticker"].tolist() if t]
448
+ if len(symbols) == 0:
449
+ return None, "Add at least one ticker.", "Universe empty.", empty_positions_df(), empty_suggestion_df(), None
450
 
451
  symbols = validate_tickers(symbols, years_lookback)
452
  if len(symbols) == 0:
453
+ return None, "Could not validate any tickers.", "Universe invalid.", empty_positions_df(), empty_suggestion_df(), None
454
 
455
+ global UNIVERSE
456
+ UNIVERSE = list(sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
 
 
 
 
457
 
458
+ df = df[df["ticker"].isin(symbols)].copy()
459
+ amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
460
  rf_ann = RF_ANN
461
+
462
+ # --- moments & CAPM stats
463
  moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
464
+ betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
 
465
 
466
+ gross = sum(abs(v) for v in amounts.values())
467
+ if gross <= 1e-12:
468
+ return None, "All amounts are zero.", "Universe ok.", empty_positions_df(), empty_suggestion_df(), None
469
+ weights = {k: v / gross for k, v in amounts.items()}
470
 
471
+ beta_p, mu_capm, sigma_hist = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
472
+ sigma_capm = abs(beta_p) * sigma_mkt
 
473
 
474
+ # --- dataset & suggestions
475
+ synth = build_synthetic_dataset(UNIVERSE, covA, betas, rf_ann, erp_ann, n_rows=SYNTH_ROWS)
476
+ # save CSV for the grader / assignment
477
+ csv_path = os.path.join(DATA_DIR, f"investor_profiles_{int(time.time())}.csv")
478
+ ensure_dir(os.path.dirname(csv_path))
479
+ synth.to_csv(csv_path, index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
 
481
+ top3 = top3_by_return_in_band(synth, risk_band)
482
+ if use_embeddings:
483
+ top3 = rerank_with_embeddings(top3, risk_band)
 
 
484
 
485
+ # guard
486
+ if top3.empty:
487
+ top3 = synth.sort_values("mu_capm", ascending=False).head(3).reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
 
 
488
 
489
+ # pick from carousel (1..3)
490
+ idx = max(1, min(3, int(pick_idx))) - 1
491
+ row = top3.iloc[idx]
492
 
493
+ # selected suggestion stats (CAPM)
494
+ sugg_mu = float(row["mu_capm"])
495
+ sugg_sigma = float(row.get("sigma_capm", abs(row["beta"]) * sigma_mkt))
496
 
497
+ # Build holdings table (% and $) for selected suggestion
498
+ ts = [t.strip() for t in str(row["tickers"]).split(",")]
499
+ ws = [float(x) for x in str(row["weights"]).split(",")]
500
+ wsum = sum(ws) if ws else 1.0
501
+ ws = [max(0.0, w) / wsum for w in ws] # long-only normalized
502
+ budget = gross if gross > 0 else 1.0
503
+ hold_rows = []
504
+ for t, w in zip(ts, ws):
505
+ hold_rows.append({
506
+ "ticker": t,
507
+ "weight_%": round(w * 100.0, 2),
508
+ "amount_$": round(w * budget, 0)
509
+ })
510
+ sugg_table = pd.DataFrame(hold_rows, columns=["ticker", "weight_%", "amount_$"])
511
 
512
+ # positions table for current portfolio
513
+ pos_rows = []
514
+ for t in symbols:
515
+ pos_rows.append({
516
+ "ticker": t,
517
+ "amount_usd": amounts.get(t, 0.0),
518
+ "weight_exposure": weights.get(t, 0.0),
519
+ "beta": 1.0 if t == MARKET_TICKER else betas.get(t, np.nan)
520
+ })
521
+ pos_table = pd.DataFrame(pos_rows, columns=["ticker", "amount_usd", "weight_exposure", "beta"])
522
 
523
+ # --- plot
 
 
 
 
 
 
 
 
524
  img = plot_cml(
525
+ rf_ann, erp_ann, sigma_mkt,
526
+ beta_p, mu_capm, sigma_capm,
527
+ sugg_mu, sugg_sigma
 
528
  )
 
 
 
 
 
 
 
 
 
 
529
 
530
+ # --- info markdown
531
+ info_lines = []
532
+ info_lines.append("### Inputs")
533
+ info_lines.append(f"- Lookback years {years_lookback}")
534
+ info_lines.append(f"- Horizon years {int(round(HORIZON_YEARS))}")
535
+ info_lines.append(f"- Risk-free {to_pct_str(rf_ann)} from {RF_CODE}")
536
+ info_lines.append(f"- Market ERP {to_pct_str(erp_ann)}")
537
+ info_lines.append(f"- Market σ {to_pct_str(sigma_mkt)}")
538
+ info_lines.append("")
539
+ info_lines.append("### Your portfolio (CAPM)")
540
+ info_lines.append(f"- Beta {beta_p:.2f}")
541
+ info_lines.append(f"- Expected return (CAPM / SML) {to_pct_str(mu_capm)}")
542
+ info_lines.append(f"- on CML for your beta (|β|×σ_mkt) {to_pct_str(sigma_capm)}")
543
+ info_lines.append("")
544
+ info_lines.append("### Dataset-based suggestion (carousel)")
545
+ info_lines.append(f"- Risk band **{risk_band}**, showing **Pick #{idx+1} of 3**")
546
+ info_lines.append(f"- Suggested CAPM return {to_pct_str(sugg_mu)}")
547
+ info_lines.append(f"- Suggested CAPM σ {to_pct_str(sugg_sigma)}")
548
+ info_lines.append("")
549
+ info_lines.append("_Note: points are CAPM expectations on the CML (not historical means)._" )
550
+ info = "\n".join(info_lines)
551
+
552
+ uni_msg = f"Universe set to: {', '.join(UNIVERSE)}"
553
+ return img, info, uni_msg, pos_table, sugg_table, csv_path
554
+
555
+
556
+ # ---------------- UI ----------------
557
  with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
558
  gr.Markdown(
559
  "## Efficient Portfolio Advisor\n"
560
+ "Search symbols, enter **dollar amounts**, set horizon. Returns use Yahoo Finance monthly data; risk-free from FRED. "
561
+ "Plot shows **CAPM point on the CML** plus selected suggestion."
 
562
  )
563
 
564
  with gr.Row():
565
  with gr.Column(scale=1):
566
  q = gr.Textbox(label="Search symbol")
567
  search_note = gr.Markdown()
568
+ matches = gr.Dropdown(choices=[], label="Matches")
569
  search_btn = gr.Button("Search")
570
  add_btn = gr.Button("Add selected to portfolio")
571
 
 
577
  col_count=(2, "fixed")
578
  )
579
 
580
+ horizon = gr.Number(label="Horizon in years (1–100)", value=DEFAULT_HORIZON_YEARS, precision=0)
581
  lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for betas & covariances")
582
 
583
+ gr.Markdown("### Suggestions")
584
+ risk_band = gr.Radio(choices=["Low", "Medium", "High"], value="Medium", label="Risk tolerance")
585
+ use_emb = gr.Checkbox(label="Use finance embeddings to refine picks", value=False)
586
+ pick_idx = gr.Slider(1, 3, value=1, step=1, label="Suggestion (carousel)")
587
+
588
+ run_btn = gr.Button("Compute (build dataset & suggest)")
589
 
590
  with gr.Column(scale=1):
591
  plot = gr.Image(label="Capital Market Line (CAPM)", type="pil")
592
+ summary = gr.Markdown(label="Inputs & Results")
593
  universe_msg = gr.Textbox(label="Universe status", interactive=False)
594
 
595
  positions = gr.Dataframe(
596
  label="Computed positions",
597
+ headers=["ticker", "amount_usd", "weight_exposure", "beta"],
598
  datatype=["str", "number", "number", "number"],
599
+ col_count=(4, "fixed"),
600
  value=empty_positions_df(),
601
  interactive=False
602
  )
603
 
604
+ sugg_table = gr.Dataframe(
605
+ label="Selected suggestion (carousel) — holdings shown in % and $",
606
+ headers=["ticker", "weight_%", "amount_$"],
 
 
 
 
 
607
  datatype=["str", "number", "number"],
608
+ col_count=(3, "fixed"),
609
+ value=empty_suggestion_df(),
610
  interactive=False
611
  )
612
 
613
+ dl = gr.File(label="Generated dataset CSV", value=None, visible=True)
 
 
 
614
 
615
+ # wiring
616
+ search_btn.click(fn=search_cb, inputs=q, outputs=[search_note, matches])
617
+ add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
618
+ table.change(fn=lock_table, inputs=table, outputs=table)
619
  horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
620
 
621
+ # main compute
622
  run_btn.click(
623
  fn=compute,
624
+ inputs=[lookback, table, risk_band, use_emb, pick_idx],
625
+ outputs=[plot, summary, universe_msg, positions, sugg_table, dl]
 
 
 
 
 
 
 
 
 
 
 
626
  )
627
 
628
  if __name__ == "__main__":