Tulitula commited on
Commit
42f56cc
·
verified ·
1 Parent(s): fbe9e4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +278 -318
app.py CHANGED
@@ -1,72 +1,42 @@
1
- # app.py
2
- # Efficient Portfolio Advisor CAPM on CML + Low/Medium/High suggestion carousel
3
- # - Search tickers, enter $ amounts (negatives allowed), pick horizon
4
- # - Plot shows CAPM point on the CML (not historical)
5
- # - Suggestions are sampled from a 1,000-row dataset generated from your universe
6
- # - Carousel lets you flip between 3 suggestions in the chosen risk band
7
- # - Optional: rerank suggestions with finance embeddings (FinLang) to be on-theme
8
-
9
- import io
10
- import os
11
- import math
12
- import json
13
- import time
14
- import warnings
15
- from typing import Dict, List, Optional, Tuple
16
-
17
  warnings.filterwarnings("ignore")
18
 
 
 
19
  import numpy as np
20
  import pandas as pd
21
  import matplotlib.pyplot as plt
22
  from PIL import Image
23
-
24
- import gradio as gr
25
  import requests
26
  import yfinance as yf
 
27
 
28
- # Optional embeddings (won't break if missing GPU; loads once)
29
- _EMBED_MODEL = None
30
- def get_embed_model():
31
- global _EMBED_MODEL
32
- if _EMBED_MODEL is None:
33
- try:
34
- from sentence_transformers import SentenceTransformer
35
- _EMBED_MODEL = SentenceTransformer("FinLang/finance-embeddings-investopedia")
36
- except Exception:
37
- _EMBED_MODEL = None
38
- return _EMBED_MODEL
39
-
40
-
41
- # ---------------- Configuration ----------------
42
  DATA_DIR = "data"
43
  os.makedirs(DATA_DIR, exist_ok=True)
44
 
45
- MARKET_TICKER = "VOO" # proxy for market
46
  MAX_TICKERS = 30
47
  DEFAULT_LOOKBACK_YEARS = 10
48
- DEFAULT_HORIZON_YEARS = 10
49
- SYNTH_ROWS = 1000
50
-
51
- FRED_MAP = [
52
- (1, "DGS1"),
53
- (2, "DGS2"),
54
- (3, "DGS3"),
55
- (5, "DGS5"),
56
- (7, "DGS7"),
57
- (10, "DGS10"),
58
- (20, "DGS20"),
59
- (30, "DGS30"),
60
- (100,"DGS30"),
61
- ]
62
-
63
- def ensure_dir(p): os.makedirs(p, exist_ok=True)
64
 
 
 
 
 
 
 
65
  def fred_series_for_horizon(years: float) -> str:
 
66
  y = max(1.0, min(100.0, float(years)))
67
- for cutoff, code in FRED_MAP:
68
- if y <= cutoff:
69
- return code
 
 
 
70
  return "DGS30"
71
 
72
  def fetch_fred_yield_annual(code: str) -> float:
@@ -81,52 +51,46 @@ def fetch_fred_yield_annual(code: str) -> float:
81
  return 0.03
82
 
83
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
84
- tickers = list(dict.fromkeys([t.upper().strip() for t in tickers if t]))
85
- if not tickers:
86
- return pd.DataFrame()
87
- start = pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)
88
- end = pd.Timestamp.today(tz="UTC")
89
 
90
- raw = yf.download(
91
  tickers,
92
- start=start.date(),
93
- end=end.date(),
94
  interval="1mo",
95
  auto_adjust=True,
 
96
  progress=False,
97
- group_by="column"
 
98
  )
99
- if raw is None or len(raw) == 0:
100
- return pd.DataFrame()
101
-
102
- # Handle single or multi-index columns
103
- if isinstance(raw.columns, pd.MultiIndex):
104
- # level 0: OHLCV, level 1: ticker
105
- if "Close" in raw.columns.levels[0]:
106
- closes = raw.xs("Close", axis=1, level=0)
 
 
 
107
  else:
108
- # fallback: try Adj Close else last level
109
- level0 = raw.columns.levels[0].tolist()
110
- col0 = "Adj Close" if "Adj Close" in level0 else level0[0]
111
- closes = raw.xs(col0, axis=1, level=0)
112
  else:
113
- if "Close" in raw.columns:
114
- closes = raw[["Close"]]
115
- elif "Adj Close" in raw.columns:
116
- closes = raw[["Adj Close"]].rename(columns={"Adj Close":"Close"})
117
- else:
118
- closes = raw
119
-
120
- if isinstance(closes, pd.Series):
121
- closes = closes.to_frame()
122
 
123
- # Normalize columns to uppercase tickers
124
- closes.columns = [str(c).upper() for c in closes.columns]
125
- closes = closes.dropna(how="all").fillna(method="ffill")
126
- return closes
127
 
128
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
129
- return prices.pct_change().dropna(how="all")
130
 
131
  def yahoo_search(query: str):
132
  if not query or not str(query).strip():
@@ -152,28 +116,28 @@ def yahoo_search(query: str):
152
  return [f"{query.strip().upper()} | typed symbol | n/a"]
153
 
154
  def validate_tickers(symbols: List[str], years: int) -> List[str]:
155
- base = [s for s in dict.fromkeys([t.upper().strip() for t in symbols if t])]
156
  px = fetch_prices_monthly(base + [MARKET_TICKER], years)
157
- ok = [t for t in base if t in px.columns]
 
 
 
158
  return ok
159
 
160
-
161
- # ---------------- Moments / CAPM ----------------
162
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
163
- uniq = [c for c in dict.fromkeys([s.upper() for s in symbols if s])]
164
- if MARKET_TICKER not in uniq:
165
- uniq.append(MARKET_TICKER)
166
- px = fetch_prices_monthly(uniq, years)
167
  rets = monthly_returns(px)
168
- cols = [c for c in uniq if c in rets.columns]
169
  R = rets[cols].dropna(how="any")
170
  return R.loc[:, ~R.columns.duplicated()]
171
 
172
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
173
  R = get_aligned_monthly_returns(symbols, years)
174
- if MARKET_TICKER not in R.columns or R.shape[0] < 3:
175
- raise ValueError("Not enough aligned data for market / assets")
176
-
177
  rf_m = rf_ann / 12.0
178
 
179
  m = R[MARKET_TICKER]
@@ -186,19 +150,19 @@ def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
186
 
187
  ex_m = m - rf_m
188
  var_m = float(np.var(ex_m.values, ddof=1))
189
- var_m = max(var_m, 1e-8)
190
 
191
  betas: Dict[str, float] = {}
192
  for s in [c for c in R.columns if c != MARKET_TICKER]:
193
  ex_s = R[s] - rf_m
194
  cov_sm = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1])
195
  betas[s] = cov_sm / var_m
 
196
  betas[MARKET_TICKER] = 1.0
197
 
198
- # IMPORTANT: include the market in covariance (fixes under-estimated sigma)
199
- cov_cols = list(R.columns)
200
- cov_m = np.cov(R[cov_cols].values.T, ddof=1)
201
- covA = pd.DataFrame(cov_m * 12.0, index=cov_cols, columns=cov_cols)
202
 
203
  return {"betas": betas, "cov_ann": covA, "erp_ann": erp_ann, "sigma_m_ann": sigma_m_ann}
204
 
@@ -219,58 +183,41 @@ def portfolio_stats(weights: Dict[str, float],
219
  beta_p = float(np.dot([betas.get(t, 0.0) for t in tickers], w_expo))
220
  mu_capm = capm_er(beta_p, rf_ann, erp_ann)
221
  cov = cov_ann.reindex(index=tickers, columns=tickers).fillna(0.0).to_numpy()
222
- sigma_hist = float(max(w_expo.T @ cov @ w_expo, 0.0)) ** 0.5 # annualized
223
  return beta_p, mu_capm, sigma_hist
224
 
225
-
226
- # ---------------- Efficient points on the CML ----------------
227
- def efficient_same_sigma_on_cml(sigma_target: float, rf: float, erp: float, sigma_mkt: float) -> float:
228
- # Expected return on CML at a given sigma
229
  if sigma_mkt <= 1e-12:
230
- return rf
231
  a = sigma_target / sigma_mkt
232
- return rf + a * erp
233
-
234
- def efficient_same_return_on_cml(mu_target: float, rf: float, erp: float, sigma_mkt: float) -> float:
235
- # Sigma on CML needed to hit a target return
236
- if abs(erp) <= 1e-12:
237
- return 0.0
238
- a = (mu_target - rf) / erp
239
- return abs(a) * sigma_mkt
240
 
 
 
 
 
 
241
 
242
- # ---------------- Plot ----------------
243
  def _pct(x):
244
- arr = np.asarray(x, dtype=float)
245
- return arr * 100.0
246
-
247
- def plot_cml(
248
- rf_ann: float,
249
- erp_ann: float,
250
- sigma_mkt: float,
251
- port_beta: float,
252
- port_mu_capm: float,
253
- port_sigma_capm: float,
254
- sugg_mu_capm: Optional[float],
255
- sugg_sigma_capm: Optional[float],
256
- ) -> Image.Image:
257
- fig = plt.figure(figsize=(6.5, 4.2), dpi=120)
258
-
259
- xmax = max(0.30, sigma_mkt * 2.1, port_sigma_capm * 1.35, (sugg_sigma_capm or 0) * 1.35)
260
- xs = np.linspace(0.0, xmax, 160)
261
- cml = rf_ann + (erp_ann / max(sigma_mkt, 1e-12)) * xs
262
- plt.plot(_pct(xs), _pct(cml), label="CML via Market", linewidth=1.8)
263
 
264
- # key points
265
- plt.scatter([_pct(0.0)], [_pct(rf_ann)], label="Risk-free", zorder=3)
266
- plt.scatter([_pct(sigma_mkt)], [_pct(rf_ann + erp_ann)], label="Market", zorder=3)
267
 
268
- # Your CAPM point
269
- plt.scatter([_pct(port_sigma_capm)], [_pct(port_mu_capm)], label="Your CAPM point", zorder=4)
 
 
 
270
 
271
- # Selected suggestion (if any)
272
- if sugg_mu_capm is not None and sugg_sigma_capm is not None:
273
- plt.scatter([_pct(sugg_sigma_capm)], [_pct(sugg_mu_capm)], label="Selected Suggestion", zorder=4)
274
 
275
  plt.xlabel("σ (annualized, %)")
276
  plt.ylabel("Expected return (annual, %)")
@@ -283,24 +230,33 @@ def plot_cml(
283
  buf.seek(0)
284
  return Image.open(buf)
285
 
286
-
287
- # ---------------- Synthetic dataset (universe-driven) ----------------
288
  def build_synthetic_dataset(universe: List[str],
289
- cov_ann: pd.DataFrame,
290
  betas: Dict[str, float],
291
- rf_ann: float, erp_ann: float,
 
 
292
  n_rows: int = SYNTH_ROWS) -> pd.DataFrame:
293
  rng = np.random.default_rng(12345)
 
 
 
 
294
  rows = []
295
- tickers = list(dict.fromkeys([t for t in universe if t]))
296
  for i in range(n_rows):
297
- k = int(rng.integers(low=max(2, min(2, len(tickers))), high=min(8, len(tickers)) + 1))
298
- picks = list(rng.choice(tickers, size=k, replace=False))
299
- w = rng.dirichlet(np.ones(k)) # long-only exposure
300
- # stats
301
- wmap = {picks[j]: float(w[j]) for j in range(k)}
302
- beta_p, mu_capm, sigma_hist = portfolio_stats(wmap, cov_ann, betas, rf_ann, erp_ann)
303
- sigma_capm = abs(beta_p) * (cov_ann.loc[MARKET_TICKER, MARKET_TICKER] ** 0.5) if MARKET_TICKER in cov_ann.index else 0.0
 
 
 
 
 
304
  rows.append({
305
  "tickers": ",".join(picks),
306
  "weights": ",".join(f"{x:.6f}" for x in w),
@@ -311,107 +267,101 @@ def build_synthetic_dataset(universe: List[str],
311
  })
312
  return pd.DataFrame(rows)
313
 
314
- def parse_row_exposures(row: pd.Series, universe: List[str]) -> Optional[np.ndarray]:
315
- try:
316
- ts = [t.strip() for t in str(row["tickers"]).split(",")]
317
- ws = [float(x) for x in str(row["weights"]).split(",")]
318
- mp = {ts[i]: ws[i] for i in range(min(len(ts), len(ws)))}
319
- v = np.array([mp.get(t, 0.0) for t in universe], dtype=float)
320
- s = float(v.sum())
321
- if s <= 1e-12: return None
322
- return v / s
323
- except Exception:
324
- return None
325
-
326
- def select_band(df: pd.DataFrame, band: str) -> pd.DataFrame:
327
- if df.empty: return df
328
- q = df["sigma_capm"].quantile
329
- if band == "Low":
330
- lo, hi = -1.0, q(0.25)
331
- elif band == "Medium":
332
- lo, hi = q(0.40), q(0.60)
333
- else: # High
334
- lo, hi = q(0.75), float("inf")
335
- cut = df[(df["sigma_capm"] >= lo) & (df["sigma_capm"] <= hi)].copy()
336
- if cut.empty:
337
- return df.nsmallest(3, "sigma_capm") if band == "Low" else df.nlargest(3, "sigma_capm")
338
- return cut
339
-
340
- def top3_by_return_in_band(df: pd.DataFrame, band: str) -> pd.DataFrame:
341
- band_df = select_band(df, band)
342
- return band_df.sort_values("mu_capm", ascending=False).head(3).reset_index(drop=True)
343
-
344
-
345
- # ---------------- Embeddings rerank (optional) ----------------
346
- def rerank_with_embeddings(df3: pd.DataFrame, band: str) -> pd.DataFrame:
347
- model = get_embed_model()
348
- if model is None or df3.empty:
349
- return df3
350
-
351
- prompts = {
352
- "Low" : "low risk diversified ETF mix, low beta, low volatility",
353
- "Medium": "balanced risk ETF mix, moderate beta, medium volatility",
354
- "High" : "high risk growth ETF mix, higher beta, higher volatility"
355
- }
356
- q = prompts.get(band, "balanced portfolio")
357
-
358
- docs = []
359
- for _, r in df3.iterrows():
360
- docs.append(
361
- f"tickers={r['tickers']} weights={r['weights']} "
362
- f"beta={r['beta']:.3f} mu_capm={r['mu_capm']:.3f} sigma_capm={r['sigma_capm']:.3f}"
363
- )
364
  try:
365
- E = model.encode([q] + docs, normalize_embeddings=True)
366
- qv = E[0:1]
367
- dv = E[1:]
368
- sims = (qv @ dv.T).ravel()
369
- order = np.argsort(-sims)
370
- return df3.iloc[order].reset_index(drop=True)
371
- except Exception:
372
- return df3
 
 
 
 
 
 
373
 
 
 
 
 
 
 
 
 
374
 
375
- # ---------------- Gradio helpers ----------------
376
  def empty_positions_df():
377
  return pd.DataFrame(columns=["ticker", "amount_usd", "weight_exposure", "beta"])
378
 
379
  def empty_suggestion_df():
380
  return pd.DataFrame(columns=["ticker", "weight_%", "amount_$"])
381
 
382
- UNIVERSE: List[str] = [MARKET_TICKER]
383
- HORIZON_YEARS = DEFAULT_HORIZON_YEARS
384
- RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
385
- RF_ANN = fetch_fred_yield_annual(RF_CODE)
 
 
 
 
 
386
 
387
- def search_cb(q: str):
388
  opts = yahoo_search(q)
389
- note = "Select a row and click 'Add selected to portfolio'." if opts else "No matches."
390
  return note, gr.update(choices=opts, value=None)
391
 
392
- def add_symbol(selection: str, table: pd.DataFrame):
393
- if not selection or "|" not in selection:
394
- return table, "Pick a symbol from Matches first."
395
  symbol = selection.split("|")[0].strip().upper()
396
- current = [] if table is None or len(table) == 0 else [str(x).upper() for x in table["ticker"].tolist() if str(x) != "nan"]
 
 
 
397
  tickers = current if symbol in current else current + [symbol]
 
398
  val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
399
  tickers = [t for t in tickers if t in val]
 
400
  amt_map = {}
401
- if table is not None and len(table) > 0:
402
  for _, r in table.iterrows():
403
  t = str(r.get("ticker", "")).upper()
404
  if t in tickers:
405
  amt_map[t] = float(pd.to_numeric(r.get("amount_usd", 0.0), errors="coerce") or 0.0)
 
406
  new_table = pd.DataFrame({"ticker": tickers, "amount_usd": [amt_map.get(t, 0.0) for t in tickers]})
407
- msg = f"Added {symbol}" if symbol in tickers else f"{symbol} not valid"
408
  if len(new_table) > MAX_TICKERS:
409
  new_table = new_table.iloc[:MAX_TICKERS]
410
- msg = f"Reached max of {MAX_TICKERS}"
411
- return new_table, msg
412
 
413
- def lock_table(tb: pd.DataFrame):
414
- if tb is None or len(tb) == 0:
415
  return pd.DataFrame(columns=["ticker", "amount_usd"])
416
  tickers = [str(x).upper() for x in tb["ticker"].tolist()]
417
  amounts = pd.to_numeric(tb["amount_usd"], errors="coerce").fillna(0.0).tolist()
@@ -420,27 +370,24 @@ def lock_table(tb: pd.DataFrame):
420
  amounts = amounts[:len(tickers)] + [0.0] * max(0, len(tickers) - len(amounts))
421
  return pd.DataFrame({"ticker": tickers, "amount_usd": amounts})
422
 
423
- def set_horizon(years: float):
424
- y = max(1.0, min(100.0, float(years)))
425
- code = fred_series_for_horizon(y)
426
- rf = fetch_fred_yield_annual(code)
427
- global HORIZON_YEARS, RF_CODE, RF_ANN
428
- HORIZON_YEARS = y
429
- RF_CODE = code
430
- RF_ANN = rf
431
- return f"Risk-free series {code}. Latest annual rate {rf:.2%}."
432
-
433
- def to_pct_str(x): return f"{x*100:.2f}%"
434
 
435
  def compute(
436
  years_lookback: int,
437
- table: pd.DataFrame,
438
  risk_band: str,
439
  use_embeddings: bool,
440
  pick_idx: int
441
  ):
442
- # --- inputs
443
- df = (table or pd.DataFrame(columns=["ticker","amount_usd"])).dropna()
 
 
 
 
 
 
444
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
445
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
446
 
@@ -459,106 +406,103 @@ def compute(
459
  amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
460
  rf_ann = RF_ANN
461
 
462
- # --- moments & CAPM stats
463
  moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
464
  betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
465
 
 
466
  gross = sum(abs(v) for v in amounts.values())
467
  if gross <= 1e-12:
468
  return None, "All amounts are zero.", "Universe ok.", empty_positions_df(), empty_suggestion_df(), None
469
  weights = {k: v / gross for k, v in amounts.items()}
470
 
 
471
  beta_p, mu_capm, sigma_hist = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
472
  sigma_capm = abs(beta_p) * sigma_mkt
473
 
474
- # --- dataset & suggestions
475
- synth = build_synthetic_dataset(UNIVERSE, covA, betas, rf_ann, erp_ann, n_rows=SYNTH_ROWS)
476
- # save CSV for the grader / assignment
 
 
 
477
  csv_path = os.path.join(DATA_DIR, f"investor_profiles_{int(time.time())}.csv")
478
- ensure_dir(os.path.dirname(csv_path))
479
  synth.to_csv(csv_path, index=False)
480
 
481
- top3 = top3_by_return_in_band(synth, risk_band)
482
  if use_embeddings:
483
  top3 = rerank_with_embeddings(top3, risk_band)
484
-
485
- # guard
486
  if top3.empty:
487
  top3 = synth.sort_values("mu_capm", ascending=False).head(3).reset_index(drop=True)
 
488
 
489
- # pick from carousel (1..3)
490
  idx = max(1, min(3, int(pick_idx))) - 1
491
  row = top3.iloc[idx]
492
 
493
- # selected suggestion stats (CAPM)
494
  sugg_mu = float(row["mu_capm"])
495
- sugg_sigma = float(row.get("sigma_capm", abs(row["beta"]) * sigma_mkt))
496
 
497
- # Build holdings table (% and $) for selected suggestion
498
  ts = [t.strip() for t in str(row["tickers"]).split(",")]
499
  ws = [float(x) for x in str(row["weights"]).split(",")]
500
- wsum = sum(ws) if ws else 1.0
501
- ws = [max(0.0, w) / wsum for w in ws] # long-only normalized
502
  budget = gross if gross > 0 else 1.0
503
- hold_rows = []
504
- for t, w in zip(ts, ws):
505
- hold_rows.append({
506
- "ticker": t,
507
- "weight_%": round(w * 100.0, 2),
508
- "amount_$": round(w * budget, 0)
509
- })
510
- sugg_table = pd.DataFrame(hold_rows, columns=["ticker", "weight_%", "amount_$"])
511
 
512
- # positions table for current portfolio
513
- pos_rows = []
514
- for t in symbols:
515
- pos_rows.append({
516
  "ticker": t,
517
  "amount_usd": amounts.get(t, 0.0),
518
  "weight_exposure": weights.get(t, 0.0),
519
  "beta": 1.0 if t == MARKET_TICKER else betas.get(t, np.nan)
520
- })
521
- pos_table = pd.DataFrame(pos_rows, columns=["ticker", "amount_usd", "weight_exposure", "beta"])
522
-
523
- # --- plot
524
- img = plot_cml(
525
- rf_ann, erp_ann, sigma_mkt,
526
- beta_p, mu_capm, sigma_capm,
527
- sugg_mu, sugg_sigma
528
  )
529
 
530
- # --- info markdown
531
- info_lines = []
532
- info_lines.append("### Inputs")
533
- info_lines.append(f"- Lookback years {years_lookback}")
534
- info_lines.append(f"- Horizon years {int(round(HORIZON_YEARS))}")
535
- info_lines.append(f"- Risk-free {to_pct_str(rf_ann)} from {RF_CODE}")
536
- info_lines.append(f"- Market ERP {to_pct_str(erp_ann)}")
537
- info_lines.append(f"- Market σ {to_pct_str(sigma_mkt)}")
538
- info_lines.append("")
539
- info_lines.append("### Your portfolio (CAPM)")
540
- info_lines.append(f"- Beta {beta_p:.2f}")
541
- info_lines.append(f"- Expected return (CAPM / SML) {to_pct_str(mu_capm)}")
542
- info_lines.append(f"- on CML for your beta (|β|×σ_mkt) {to_pct_str(sigma_capm)}")
543
- info_lines.append("")
544
- info_lines.append("### Dataset-based suggestion (carousel)")
545
- info_lines.append(f"- Risk band **{risk_band}**, showing **Pick #{idx+1} of 3**")
546
- info_lines.append(f"- Suggested CAPM return {to_pct_str(sugg_mu)}")
547
- info_lines.append(f"- Suggested CAPM σ {to_pct_str(sugg_sigma)}")
548
- info_lines.append("")
549
- info_lines.append("_Note: points are CAPM expectations on the CML (not historical means)._" )
550
- info = "\n".join(info_lines)
 
 
 
 
 
551
 
552
  uni_msg = f"Universe set to: {', '.join(UNIVERSE)}"
553
- return img, info, uni_msg, pos_table, sugg_table, csv_path
554
 
 
 
 
555
 
556
- # ---------------- UI ----------------
557
  with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
558
  gr.Markdown(
559
  "## Efficient Portfolio Advisor\n"
560
  "Search symbols, enter **dollar amounts**, set horizon. Returns use Yahoo Finance monthly data; risk-free from FRED. "
561
- "Plot shows **CAPM point on the CML** plus selected suggestion."
562
  )
563
 
564
  with gr.Row():
@@ -577,21 +521,23 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
577
  col_count=(2, "fixed")
578
  )
579
 
580
- horizon = gr.Number(label="Horizon in years (1–100)", value=DEFAULT_HORIZON_YEARS, precision=0)
581
- lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for betas & covariances")
582
 
583
  gr.Markdown("### Suggestions")
584
- risk_band = gr.Radio(choices=["Low", "Medium", "High"], value="Medium", label="Risk tolerance")
585
- use_emb = gr.Checkbox(label="Use finance embeddings to refine picks", value=False)
586
- pick_idx = gr.Slider(1, 3, value=1, step=1, label="Suggestion (carousel)")
587
 
588
- run_btn = gr.Button("Compute (build dataset & suggest)")
 
 
 
589
 
 
590
  with gr.Column(scale=1):
591
  plot = gr.Image(label="Capital Market Line (CAPM)", type="pil")
592
  summary = gr.Markdown(label="Inputs & Results")
593
  universe_msg = gr.Textbox(label="Universe status", interactive=False)
594
-
595
  positions = gr.Dataframe(
596
  label="Computed positions",
597
  headers=["ticker", "amount_usd", "weight_exposure", "beta"],
@@ -600,7 +546,6 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
600
  value=empty_positions_df(),
601
  interactive=False
602
  )
603
-
604
  sugg_table = gr.Dataframe(
605
  label="Selected suggestion (carousel) — holdings shown in % and $",
606
  headers=["ticker", "weight_%", "amount_$"],
@@ -609,21 +554,36 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
609
  value=empty_suggestion_df(),
610
  interactive=False
611
  )
612
-
613
  dl = gr.File(label="Generated dataset CSV", value=None, visible=True)
614
 
615
- # wiring
616
- search_btn.click(fn=search_cb, inputs=q, outputs=[search_note, matches])
617
  add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
618
- table.change(fn=lock_table, inputs=table, outputs=table)
619
  horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
620
 
 
 
 
 
 
 
 
 
 
 
 
 
621
  # main compute
622
  run_btn.click(
623
  fn=compute,
624
  inputs=[lookback, table, risk_band, use_emb, pick_idx],
625
- outputs=[plot, summary, universe_msg, positions, sugg_table, dl]
626
  )
627
 
 
 
 
 
628
  if __name__ == "__main__":
629
  demo.launch()
 
1
+ 3# app.py
2
+ import os, io, math, time, warnings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  warnings.filterwarnings("ignore")
4
 
5
+ from typing import List, Tuple, Dict, Optional
6
+
7
  import numpy as np
8
  import pandas as pd
9
  import matplotlib.pyplot as plt
10
  from PIL import Image
 
 
11
  import requests
12
  import yfinance as yf
13
+ import gradio as gr
14
 
15
+ # ---------------- config ----------------
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  DATA_DIR = "data"
17
  os.makedirs(DATA_DIR, exist_ok=True)
18
 
 
19
  MAX_TICKERS = 30
20
  DEFAULT_LOOKBACK_YEARS = 10
21
+ MARKET_TICKER = "VOO"
22
+
23
+ SYNTH_ROWS = 1000 # size of generated dataset for suggestions
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Globals that update with horizon changes
26
+ HORIZON_YEARS = 10
27
+ RF_CODE = "DGS10"
28
+ RF_ANN = 0.0375 # updated at launch
29
+
30
+ # ---------------- helpers ----------------
31
  def fred_series_for_horizon(years: float) -> str:
32
+ # crude tenor map
33
  y = max(1.0, min(100.0, float(years)))
34
+ if y <= 2: return "DGS2"
35
+ if y <= 3: return "DGS3"
36
+ if y <= 5: return "DGS5"
37
+ if y <= 7: return "DGS7"
38
+ if y <= 10: return "DGS10"
39
+ if y <= 20: return "DGS20"
40
  return "DGS30"
41
 
42
  def fetch_fred_yield_annual(code: str) -> float:
 
51
  return 0.03
52
 
53
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
54
+ tickers = list(dict.fromkeys([t.upper().strip() for t in tickers]))
55
+ start = (pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)).date()
56
+ end = pd.Timestamp.today(tz="UTC").date()
 
 
57
 
58
+ df = yf.download(
59
  tickers,
60
+ start=start,
61
+ end=end,
62
  interval="1mo",
63
  auto_adjust=True,
64
+ actions=False,
65
  progress=False,
66
+ group_by="column",
67
+ threads=False,
68
  )
69
+
70
+ # Normalize to wide frame of prices (one column per ticker)
71
+ if isinstance(df, pd.Series):
72
+ df = df.to_frame()
73
+ if isinstance(df.columns, pd.MultiIndex):
74
+ # prefer Close; fall back to Adj Close if needed
75
+ lvl0 = [str(x) for x in df.columns.get_level_values(0).unique()]
76
+ if "Close" in lvl0:
77
+ df = df["Close"]
78
+ elif "Adj Close" in lvl0:
79
+ df = df["Adj Close"]
80
  else:
81
+ # take last level if unknown shape
82
+ df = df.xs(df.columns.levels[0][-1], axis=1, level=0, drop_level=True)
 
 
83
  else:
84
+ # some yfinance versions already return simple columns per ticker
85
+ pass
 
 
 
 
 
 
 
86
 
87
+ # keep only tickers we asked for, forward fill, drop all-NaN rows
88
+ cols = [c for c in tickers if c in df.columns]
89
+ out = df[cols].dropna(how="all").fillna(method="ffill")
90
+ return out
91
 
92
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
93
+ return prices.pct_change().dropna()
94
 
95
  def yahoo_search(query: str):
96
  if not query or not str(query).strip():
 
116
  return [f"{query.strip().upper()} | typed symbol | n/a"]
117
 
118
  def validate_tickers(symbols: List[str], years: int) -> List[str]:
119
+ base = [s for s in dict.fromkeys([t.upper().strip() for t in symbols]) if s]
120
  px = fetch_prices_monthly(base + [MARKET_TICKER], years)
121
+ ok = [s for s in base if s in px.columns]
122
+ # Ensure market exists as well for aligned computation
123
+ if MARKET_TICKER not in px.columns:
124
+ return [] # without market we can't compute CAPM moments
125
  return ok
126
 
127
+ # -------------- aligned moments --------------
 
128
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
129
+ uniq = [c for c in dict.fromkeys(symbols) if c != MARKET_TICKER]
130
+ tickers = uniq + [MARKET_TICKER]
131
+ px = fetch_prices_monthly(tickers, years)
 
132
  rets = monthly_returns(px)
133
+ cols = [c for c in uniq if c in rets.columns] + ([MARKET_TICKER] if MARKET_TICKER in rets.columns else [])
134
  R = rets[cols].dropna(how="any")
135
  return R.loc[:, ~R.columns.duplicated()]
136
 
137
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
138
  R = get_aligned_monthly_returns(symbols, years)
139
+ if MARKET_TICKER not in R.columns or len(R) < 3:
140
+ raise ValueError("Not enough aligned data with market proxy.")
 
141
  rf_m = rf_ann / 12.0
142
 
143
  m = R[MARKET_TICKER]
 
150
 
151
  ex_m = m - rf_m
152
  var_m = float(np.var(ex_m.values, ddof=1))
153
+ var_m = max(var_m, 1e-9)
154
 
155
  betas: Dict[str, float] = {}
156
  for s in [c for c in R.columns if c != MARKET_TICKER]:
157
  ex_s = R[s] - rf_m
158
  cov_sm = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1])
159
  betas[s] = cov_sm / var_m
160
+
161
  betas[MARKET_TICKER] = 1.0
162
 
163
+ asset_cols = [c for c in R.columns if c != MARKET_TICKER]
164
+ cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
165
+ covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
 
166
 
167
  return {"betas": betas, "cov_ann": covA, "erp_ann": erp_ann, "sigma_m_ann": sigma_m_ann}
168
 
 
183
  beta_p = float(np.dot([betas.get(t, 0.0) for t in tickers], w_expo))
184
  mu_capm = capm_er(beta_p, rf_ann, erp_ann)
185
  cov = cov_ann.reindex(index=tickers, columns=tickers).fillna(0.0).to_numpy()
186
+ sigma_hist = float(max(w_expo.T @ cov @ w_expo, 0.0)) ** 0.5
187
  return beta_p, mu_capm, sigma_hist
188
 
189
+ def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
190
+ # weights on (Market, Bills) that achieve same sigma as target, on CML
 
 
191
  if sigma_mkt <= 1e-12:
192
+ return 0.0, 1.0, rf_ann
193
  a = sigma_target / sigma_mkt
194
+ return a, 1.0 - a, rf_ann + a * erp_ann
 
 
 
 
 
 
 
195
 
196
+ def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
197
+ if abs(erp_ann) <= 1e-12:
198
+ return 0.0, 1.0, rf_ann
199
+ a = (mu_target - rf_ann) / erp_ann
200
+ return a, 1.0 - a, abs(a) * sigma_mkt
201
 
202
+ # -------------- plotting (CAPM on CML) --------------
203
  def _pct(x):
204
+ return np.asarray(x, dtype=float) * 100.0
205
+
206
+ def plot_cml(rf_ann, erp_ann, sigma_mkt, beta_p, mu_capm, sigma_capm, sugg_mu=None, sugg_sigma=None) -> Image.Image:
207
+ fig = plt.figure(figsize=(6, 4), dpi=120)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
+ xmax = max(0.3, sigma_mkt * 2.2, (sigma_capm or 0.0) * 1.6, (sugg_sigma or 0.0) * 1.6)
210
+ xs = np.linspace(0, xmax, 200)
211
+ cml = rf_ann + (erp_ann / max(sigma_mkt, 1e-9)) * xs
212
 
213
+ plt.plot(_pct(xs), _pct(cml), label="CML via Market", linewidth=1.8)
214
+ # key points on CML (CAPM view)
215
+ plt.scatter([_pct(0)], [_pct(rf_ann)], label="Risk-free")
216
+ plt.scatter([_pct(sigma_mkt)], [_pct(rf_ann + erp_ann)], label="Market")
217
+ plt.scatter([_pct(sigma_capm)], [_pct(mu_capm)], label="Your CAPM point", marker="o")
218
 
219
+ if sugg_mu is not None and sugg_sigma is not None:
220
+ plt.scatter([_pct(sugg_sigma)], [_pct(sugg_mu)], label="Selected Suggestion", marker="X", s=60)
 
221
 
222
  plt.xlabel("σ (annualized, %)")
223
  plt.ylabel("Expected return (annual, %)")
 
230
  buf.seek(0)
231
  return Image.open(buf)
232
 
233
+ # -------------- synthetic dataset (from current universe) --------------
 
234
  def build_synthetic_dataset(universe: List[str],
235
+ covA: pd.DataFrame,
236
  betas: Dict[str, float],
237
+ rf_ann: float,
238
+ erp_ann: float,
239
+ sigma_mkt: float,
240
  n_rows: int = SYNTH_ROWS) -> pd.DataFrame:
241
  rng = np.random.default_rng(12345)
242
+ assets = [t for t in universe if t != MARKET_TICKER]
243
+ if not assets:
244
+ assets = [MARKET_TICKER]
245
+
246
  rows = []
 
247
  for i in range(n_rows):
248
+ k = int(rng.integers(low=2, high=min(8, len(universe)) + 1))
249
+ picks = list(rng.choice(universe, size=k, replace=False))
250
+ # long-only exposures sum to 1 (cleaner for presentation)
251
+ w = rng.dirichlet(np.ones(k))
252
+ beta_p = float(np.dot([betas.get(t, 0.0) for t in picks], w))
253
+ mu_capm = capm_er(beta_p, rf_ann, erp_ann)
254
+ # historical sigma of that physical mix (not used on CML)
255
+ sub = covA.reindex(index=picks, columns=picks).fillna(0.0).to_numpy()
256
+ sigma_hist = float(max(w.T @ sub @ w, 0.0)) ** 0.5
257
+ # CAPM sigma on CML for same expected return
258
+ sigma_capm = abs(beta_p) * sigma_mkt
259
+
260
  rows.append({
261
  "tickers": ",".join(picks),
262
  "weights": ",".join(f"{x:.6f}" for x in w),
 
267
  })
268
  return pd.DataFrame(rows)
269
 
270
+ def _band_bounds(sigma_mkt: float, band: str) -> Tuple[float, float]:
271
+ band = (band or "Medium").strip().lower()
272
+ if band.startswith("low"):
273
+ return 0.0, 0.8 * sigma_mkt
274
+ if band.startswith("high"):
275
+ return 1.2 * sigma_mkt, 3.0 * sigma_mkt
276
+ # medium
277
+ return 0.8 * sigma_mkt, 1.2 * sigma_mkt
278
+
279
+ def top3_by_return_in_band(df: pd.DataFrame, band: str, sigma_mkt: float) -> pd.DataFrame:
280
+ lo, hi = _band_bounds(sigma_mkt, band)
281
+ pick = df[(df["sigma_capm"] >= lo) & (df["sigma_capm"] <= hi)].copy()
282
+ if pick.empty:
283
+ pick = df.copy()
284
+ pick = pick.sort_values("mu_capm", ascending=False).head(3).reset_index(drop=True)
285
+ pick.insert(0, "pick", [1, 2, 3][: len(pick)])
286
+ return pick
287
+
288
+ # -------------- optional: embeddings rerank --------------
289
+ def rerank_with_embeddings(top3: pd.DataFrame, band: str) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  try:
291
+ from sentence_transformers import SentenceTransformer
292
+ model = SentenceTransformer("FinLang/finance-embeddings-investopedia")
293
+ prompt = {
294
+ "low": "low risk conservative portfolio stable diversified market exposure",
295
+ "medium": "balanced medium risk diversified portfolio",
296
+ "high": "high risk growth aggressive portfolio higher expected return"
297
+ }[(band or "medium").lower() if (band or "medium").lower() in {"low","medium","high"} else "medium"]
298
+
299
+ cand_texts = []
300
+ for _, r in top3.iterrows():
301
+ cand_texts.append(
302
+ f"portfolio with tickers {r['tickers']} having beta {float(r['beta']):.2f}, "
303
+ f"expected return {float(r['mu_capm']):.3f}, sigma {float(r['sigma_capm']):.3f}"
304
+ )
305
 
306
+ q = model.encode([prompt])
307
+ c = model.encode(cand_texts)
308
+ # cosine similarity
309
+ sims = (q @ c.T) / (np.linalg.norm(q) * np.linalg.norm(c, axis=1, keepdims=False))
310
+ order = np.argsort(-sims.ravel())
311
+ return top3.iloc[order].reset_index(drop=True)
312
+ except Exception:
313
+ return top3
314
 
315
+ # -------------- UI helpers --------------
316
  def empty_positions_df():
317
  return pd.DataFrame(columns=["ticker", "amount_usd", "weight_exposure", "beta"])
318
 
319
  def empty_suggestion_df():
320
  return pd.DataFrame(columns=["ticker", "weight_%", "amount_$"])
321
 
322
+ def set_horizon(years: float):
323
+ y = max(1.0, min(100.0, float(years)))
324
+ code = fred_series_for_horizon(y)
325
+ rf = fetch_fred_yield_annual(code)
326
+ global HORIZON_YEARS, RF_CODE, RF_ANN
327
+ HORIZON_YEARS = y
328
+ RF_CODE = code
329
+ RF_ANN = rf
330
+ return f"Risk-free series {code}. Latest annual rate {rf:.2%}."
331
 
332
+ def search_tickers_cb(q: str):
333
  opts = yahoo_search(q)
334
+ note = "Select a symbol and click 'Add selected to portfolio'." if opts else "No matches."
335
  return note, gr.update(choices=opts, value=None)
336
 
337
+ def add_symbol(selection: str, table: Optional[pd.DataFrame]):
338
+ if not selection:
339
+ return table if isinstance(table, pd.DataFrame) else pd.DataFrame(columns=["ticker","amount_usd"]), "Pick a row in Matches first."
340
  symbol = selection.split("|")[0].strip().upper()
341
+
342
+ current = []
343
+ if isinstance(table, pd.DataFrame) and not table.empty:
344
+ current = [str(x).upper() for x in table["ticker"].tolist() if str(x) != "nan"]
345
  tickers = current if symbol in current else current + [symbol]
346
+
347
  val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
348
  tickers = [t for t in tickers if t in val]
349
+
350
  amt_map = {}
351
+ if isinstance(table, pd.DataFrame) and not table.empty:
352
  for _, r in table.iterrows():
353
  t = str(r.get("ticker", "")).upper()
354
  if t in tickers:
355
  amt_map[t] = float(pd.to_numeric(r.get("amount_usd", 0.0), errors="coerce") or 0.0)
356
+
357
  new_table = pd.DataFrame({"ticker": tickers, "amount_usd": [amt_map.get(t, 0.0) for t in tickers]})
 
358
  if len(new_table) > MAX_TICKERS:
359
  new_table = new_table.iloc[:MAX_TICKERS]
360
+ return new_table, f"Reached max of {MAX_TICKERS}."
361
+ return new_table, f"Added {symbol}."
362
 
363
+ def lock_ticker_column(tb: Optional[pd.DataFrame]):
364
+ if not isinstance(tb, pd.DataFrame) or tb.empty:
365
  return pd.DataFrame(columns=["ticker", "amount_usd"])
366
  tickers = [str(x).upper() for x in tb["ticker"].tolist()]
367
  amounts = pd.to_numeric(tb["amount_usd"], errors="coerce").fillna(0.0).tolist()
 
370
  amounts = amounts[:len(tickers)] + [0.0] * max(0, len(tickers) - len(amounts))
371
  return pd.DataFrame({"ticker": tickers, "amount_usd": amounts})
372
 
373
+ # -------------- main compute --------------
374
+ UNIVERSE: List[str] = [MARKET_TICKER, "QQQ", "VTI", "SOXX", "IBIT"]
 
 
 
 
 
 
 
 
 
375
 
376
  def compute(
377
  years_lookback: int,
378
+ table: Optional[pd.DataFrame],
379
  risk_band: str,
380
  use_embeddings: bool,
381
  pick_idx: int
382
  ):
383
+ # sanitize table
384
+ if isinstance(table, pd.DataFrame):
385
+ df = table.copy()
386
+ else:
387
+ df = pd.DataFrame(columns=["ticker", "amount_usd"])
388
+ df = df.dropna(how="all")
389
+ if "ticker" not in df.columns: df["ticker"] = []
390
+ if "amount_usd" not in df.columns: df["amount_usd"] = []
391
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
392
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
393
 
 
406
  amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
407
  rf_ann = RF_ANN
408
 
409
+ # Moments
410
  moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
411
  betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
412
 
413
+ # Weights
414
  gross = sum(abs(v) for v in amounts.values())
415
  if gross <= 1e-12:
416
  return None, "All amounts are zero.", "Universe ok.", empty_positions_df(), empty_suggestion_df(), None
417
  weights = {k: v / gross for k, v in amounts.items()}
418
 
419
+ # Portfolio CAPM stats
420
  beta_p, mu_capm, sigma_hist = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
421
  sigma_capm = abs(beta_p) * sigma_mkt
422
 
423
+ # Efficient alternatives (using historical σ and CAPM μ for reference)
424
+ a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_hist, rf_ann, erp_ann, sigma_mkt)
425
+ a_mu, b_mu, sigma_eff_mu = efficient_same_return(mu_capm, rf_ann, erp_ann, sigma_mkt)
426
+
427
+ # Synthetic dataset & suggestions
428
+ synth = build_synthetic_dataset(UNIVERSE, covA, betas, rf_ann, erp_ann, sigma_mkt, n_rows=SYNTH_ROWS)
429
  csv_path = os.path.join(DATA_DIR, f"investor_profiles_{int(time.time())}.csv")
 
430
  synth.to_csv(csv_path, index=False)
431
 
432
+ top3 = top3_by_return_in_band(synth, risk_band, sigma_mkt)
433
  if use_embeddings:
434
  top3 = rerank_with_embeddings(top3, risk_band)
 
 
435
  if top3.empty:
436
  top3 = synth.sort_values("mu_capm", ascending=False).head(3).reset_index(drop=True)
437
+ top3.insert(0, "pick", [1, 2, 3][: len(top3)])
438
 
 
439
  idx = max(1, min(3, int(pick_idx))) - 1
440
  row = top3.iloc[idx]
441
 
 
442
  sugg_mu = float(row["mu_capm"])
443
+ sugg_sigma = float(row["sigma_capm"])
444
 
445
+ # suggestion holdings (% and $)
446
  ts = [t.strip() for t in str(row["tickers"]).split(",")]
447
  ws = [float(x) for x in str(row["weights"]).split(",")]
448
+ s = sum(ws) if ws else 1.0
449
+ ws = [max(0.0, w) / s for w in ws]
450
  budget = gross if gross > 0 else 1.0
451
+ sugg_table = pd.DataFrame(
452
+ [{"ticker": t, "weight_%": round(w*100.0, 2), "amount_$": round(w*budget, 0)} for t, w in zip(ts, ws)],
453
+ columns=["ticker", "weight_%", "amount_$"]
454
+ )
 
 
 
 
455
 
456
+ # positions table
457
+ pos_table = pd.DataFrame(
458
+ [{
 
459
  "ticker": t,
460
  "amount_usd": amounts.get(t, 0.0),
461
  "weight_exposure": weights.get(t, 0.0),
462
  "beta": 1.0 if t == MARKET_TICKER else betas.get(t, np.nan)
463
+ } for t in symbols],
464
+ columns=["ticker", "amount_usd", "weight_exposure", "beta"]
 
 
 
 
 
 
465
  )
466
 
467
+ # plot
468
+ img = plot_cml(rf_ann, erp_ann, sigma_mkt, beta_p, mu_capm, sigma_capm, sugg_mu, sugg_sigma)
469
+
470
+ info = "\n".join([
471
+ "### Inputs",
472
+ f"- Lookback years {years_lookback}",
473
+ f"- Horizon years {int(round(HORIZON_YEARS))}",
474
+ f"- Risk-free {rf_ann:.2%} from {RF_CODE}",
475
+ f"- Market ERP {erp_ann:.2%}",
476
+ f"- Market σ {sigma_mkt:.2%}",
477
+ "",
478
+ "### Your portfolio (CAPM)",
479
+ f"- Beta {beta_p:.2f}",
480
+ f"- Expected return (CAPM / SML) {mu_capm:.2%}",
481
+ f"- on CML for your beta (|β|×σ_mkt) {sigma_capm:.2%}",
482
+ "",
483
+ "### Efficient alternatives on CML",
484
+ f"- Same σ as your portfolio (historical): Market weight {a_sigma:.2f}, Bills weight {b_sigma:.2f}, return {mu_eff_sigma:.2%}",
485
+ f"- Same return (CAPM): Market weight {a_mu:.2f}, Bills weight {b_mu:.2f}, σ {sigma_eff_mu:.2%}",
486
+ "",
487
+ "### Dataset-based suggestions (risk: " + risk_band + ")",
488
+ f"- Use the carousel to flip between **Pick #1 / #2 / #3**.",
489
+ f"- Showing Pick **#{idx+1}** → CAPM return {sugg_mu:.2%}, CAPM σ {sugg_sigma:.2%}",
490
+ "",
491
+ "_Plot shows CAPM expectations on the CML (not historical means)._"
492
+ ])
493
 
494
  uni_msg = f"Universe set to: {', '.join(UNIVERSE)}"
495
+ return img, info, uni_msg, pos_table, sugg_table, csv_path, gr.update(label=f"Pick #{idx+1} of 3")
496
 
497
+ # -------------- UI --------------
498
+ def inc_pick(i: int): return min(3, max(1, int(i or 1) + 1))
499
+ def dec_pick(i: int): return max(1, min(3, int(i or 1) - 1))
500
 
 
501
  with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
502
  gr.Markdown(
503
  "## Efficient Portfolio Advisor\n"
504
  "Search symbols, enter **dollar amounts**, set horizon. Returns use Yahoo Finance monthly data; risk-free from FRED. "
505
+ "Plot shows **CAPM point on the CML** plus efficient CML points."
506
  )
507
 
508
  with gr.Row():
 
521
  col_count=(2, "fixed")
522
  )
523
 
524
+ horizon = gr.Number(label="Horizon in years (1–100)", value=HORIZON_YEARS, precision=0)
525
+ lookback = gr.Slider(1, 15, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for betas & covariances")
526
 
527
  gr.Markdown("### Suggestions")
528
+ risk_band = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Risk tolerance")
529
+ use_emb = gr.Checkbox(value=True, label="Use finance embeddings to refine picks")
 
530
 
531
+ with gr.Row():
532
+ prev_btn = gr.Button("◀ Prev")
533
+ pick_idx = gr.Number(value=1, precision=0, label="Carousel")
534
+ next_btn = gr.Button("Next ▶")
535
 
536
+ run_btn = gr.Button("Compute (build dataset & suggest)")
537
  with gr.Column(scale=1):
538
  plot = gr.Image(label="Capital Market Line (CAPM)", type="pil")
539
  summary = gr.Markdown(label="Inputs & Results")
540
  universe_msg = gr.Textbox(label="Universe status", interactive=False)
 
541
  positions = gr.Dataframe(
542
  label="Computed positions",
543
  headers=["ticker", "amount_usd", "weight_exposure", "beta"],
 
546
  value=empty_positions_df(),
547
  interactive=False
548
  )
 
549
  sugg_table = gr.Dataframe(
550
  label="Selected suggestion (carousel) — holdings shown in % and $",
551
  headers=["ticker", "weight_%", "amount_$"],
 
554
  value=empty_suggestion_df(),
555
  interactive=False
556
  )
 
557
  dl = gr.File(label="Generated dataset CSV", value=None, visible=True)
558
 
559
+ # wire search / add / locking / horizon
560
+ search_btn.click(fn=search_tickers_cb, inputs=q, outputs=[search_note, matches])
561
  add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
562
+ table.change(fn=lock_ticker_column, inputs=table, outputs=table)
563
  horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
564
 
565
+ # carousel buttons update pick index and then recompute
566
+ prev_btn.click(fn=dec_pick, inputs=pick_idx, outputs=pick_idx).then(
567
+ fn=compute,
568
+ inputs=[lookback, table, risk_band, use_emb, pick_idx],
569
+ outputs=[plot, summary, universe_msg, positions, sugg_table, dl, pick_idx]
570
+ )
571
+ next_btn.click(fn=inc_pick, inputs=pick_idx, outputs=pick_idx).then(
572
+ fn=compute,
573
+ inputs=[lookback, table, risk_band, use_emb, pick_idx],
574
+ outputs=[plot, summary, universe_msg, positions, sugg_table, dl, pick_idx]
575
+ )
576
+
577
  # main compute
578
  run_btn.click(
579
  fn=compute,
580
  inputs=[lookback, table, risk_band, use_emb, pick_idx],
581
+ outputs=[plot, summary, universe_msg, positions, sugg_table, dl, pick_idx]
582
  )
583
 
584
+ # initialize risk-free at launch
585
+ RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
586
+ RF_ANN = fetch_fred_yield_annual(RF_CODE)
587
+
588
  if __name__ == "__main__":
589
  demo.launch()