Tulitula commited on
Commit
8295760
·
verified ·
1 Parent(s): addb902

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +587 -634
app.py CHANGED
@@ -1,77 +1,58 @@
1
  # app.py
2
- import os, io, math, json, warnings
 
 
 
 
 
 
 
 
 
3
  warnings.filterwarnings("ignore")
4
 
5
- # --- make common caches writable even on locked-down containers ---
6
- APP_ROOT = os.path.abspath(os.path.dirname(__file__))
7
- DATA_DIR = os.path.join(APP_ROOT, "data")
8
- os.makedirs(DATA_DIR, exist_ok=True)
9
-
10
- # Matplotlib cache
11
- os.environ.setdefault("MPLCONFIGDIR", os.path.join(DATA_DIR, ".mplconfig"))
12
- os.makedirs(os.environ["MPLCONFIGDIR"], exist_ok=True)
13
-
14
- # Hugging Face / Sentence Transformers caches
15
- os.environ.setdefault("HF_HOME", os.path.join(DATA_DIR, ".huggingface"))
16
- os.environ.setdefault("HUGGINGFACE_HUB_CACHE", os.path.join(DATA_DIR, ".huggingface", "hub"))
17
- os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", os.path.join(DATA_DIR, ".sentencetransformers"))
18
- for d in [os.environ["HF_HOME"], os.environ["HUGGINGFACE_HUB_CACHE"], os.environ["SENTENCE_TRANSFORMERS_HOME"]]:
19
- os.makedirs(d, exist_ok=True)
20
-
21
  from typing import List, Tuple, Dict, Optional
22
 
23
  import numpy as np
24
  import pandas as pd
25
  import matplotlib.pyplot as plt
26
  from PIL import Image
27
- import gradio as gr
28
  import requests
29
  import yfinance as yf
 
30
 
31
- from sentence_transformers import SentenceTransformer, util as st_util
32
- from sklearn.preprocessing import StandardScaler
33
- from sklearn.neighbors import KNeighborsRegressor
34
 
35
- # =========================
36
- # Config
37
- # =========================
38
- DEFAULT_LOOKBACK_YEARS = 5
39
  MAX_TICKERS = 30
40
- MARKET_TICKER = "VOO" # proxy for market portfolio
41
- BILLS_TICKER = "BILLS" # synthetic cash / T-Bills bucket
42
-
43
- EMBED_MODEL_NAME = "BAAI/bge-base-en-v1.5" # fully local, no API keys
44
-
45
- POS_COLS = ["ticker", "amount_usd", "weight_exposure", "beta"]
46
- SUG_COLS = ["ticker", "weight_%", "amount_$"]
47
- EFF_COLS = ["asset", "weight_%", "amount_$"]
48
-
49
- N_SYNTH = 1000 # size of synthetic dataset per run
50
- MMR_K = 40 # shortlist size before MMR
51
- MMR_LAMBDA = 0.65 # similarity vs diversity tradeoff
52
-
53
- # ---------------- FRED mapping (risk-free source) ----------------
54
- FRED_MAP = [
55
- (1, "DGS1"),
56
- (2, "DGS2"),
57
- (3, "DGS3"),
58
- (5, "DGS5"),
59
- (7, "DGS7"),
60
- (10, "DGS10"),
61
- (20, "DGS20"),
62
- (30, "DGS30"),
63
- (100, "DGS30"),
64
- ]
65
-
66
  def fred_series_for_horizon(years: float) -> str:
67
  y = max(1.0, min(100.0, float(years)))
68
- for cutoff, code in FRED_MAP:
69
- if y <= cutoff:
70
- return code
 
 
 
71
  return "DGS30"
72
 
73
  def fetch_fred_yield_annual(code: str) -> float:
74
- # FRED CSV endpoint (no API key required). Fallback to 3% if it fails.
75
  url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?id={code}"
76
  try:
77
  r = requests.get(url, timeout=10)
@@ -82,96 +63,95 @@ def fetch_fred_yield_annual(code: str) -> float:
82
  except Exception:
83
  return 0.03
84
 
85
- # =========================
86
- # Data helpers
87
- # =========================
88
- def _to_cols_close(df: pd.DataFrame) -> pd.DataFrame:
89
- """Coerce yfinance download to a single-level columns DataFrame of adjusted closes."""
90
- if df is None or df.empty:
91
- return pd.DataFrame()
92
- if isinstance(df, pd.Series):
93
- df = df.to_frame("Close")
94
- if isinstance(df.columns, pd.MultiIndex):
95
- level0 = df.columns.get_level_values(0).unique().tolist()
96
- fields = df.columns.get_level_values(1).unique().tolist()
97
- field = "Adj Close" if "Adj Close" in fields else ("Close" if "Close" in fields else fields[0])
98
- out = {}
99
- for t in level0:
100
- col = (t, field)
101
- if col in df.columns:
102
- out[t] = df[col]
103
- out_df = pd.DataFrame(out)
104
- return out_df
105
- else:
106
- if "Adj Close" in df.columns:
107
- return df[["Adj Close"]].rename(columns={"Adj Close": "SINGLE"})
108
- if "Close" in df.columns:
109
- return df[["Close"]].rename(columns={"Close": "SINGLE"})
110
- num_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
111
- if num_cols:
112
- return df[[num_cols[0]]].rename(columns={num_cols[0]: "SINGLE"})
113
- return pd.DataFrame()
114
-
115
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
116
- start = (pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=int(years), days=7)).date()
117
- end = pd.Timestamp.today(tz="UTC").date()
118
- df_raw = yf.download(
119
- list(dict.fromkeys(tickers)),
 
 
120
  start=start, end=end,
121
- interval="1mo", auto_adjust=True, progress=False, group_by="ticker",
122
- threads=True,
 
 
 
 
123
  )
124
- df = _to_cols_close(df_raw).copy()
125
- if df.empty:
126
- return df
127
- if df.shape[1] == 1 and "SINGLE" in df.columns:
128
- df.columns = [tickers[0]]
129
- df = df.dropna(how="all").fillna(method="ffill")
130
- return df
 
 
 
 
 
 
 
 
 
131
 
132
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
133
  return prices.pct_change().dropna()
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def validate_tickers(symbols: List[str], years: int) -> List[str]:
136
- """Return subset of symbols that have enough data over lookback."""
137
- symbols = [s.strip().upper() for s in symbols if s and isinstance(s, str)]
138
- base = [s for s in symbols if s != MARKET_TICKER]
139
  px = fetch_prices_monthly(base + [MARKET_TICKER], years)
140
- ok = []
141
- for s in symbols:
142
- if s in px.columns:
143
- ok.append(s)
144
  return ok
145
 
146
- # =========================
147
- # Moments & CAPM
148
- # =========================
149
- def annualize_mean(m): return np.asarray(m, dtype=float) * 12.0
150
- def annualize_sigma(s): return np.asarray(s, dtype=float) * math.sqrt(12.0)
151
-
152
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
153
- uniq = [c for c in dict.fromkeys(symbols)]
154
- if MARKET_TICKER not in uniq:
155
- uniq.append(MARKET_TICKER)
156
- px = fetch_prices_monthly(uniq, years)
157
  rets = monthly_returns(px)
158
- cols = [c for c in uniq if c in rets.columns]
159
  R = rets[cols].dropna(how="any")
160
  return R.loc[:, ~R.columns.duplicated()]
161
 
162
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
163
- R = get_aligned_monthly_returns(symbols + [MARKET_TICKER], years)
164
- if MARKET_TICKER not in R.columns or R.shape[0] < 3:
165
- raise ValueError("Not enough aligned data to estimate moments.")
166
  rf_m = rf_ann / 12.0
167
 
168
  m = R[MARKET_TICKER]
169
  if isinstance(m, pd.DataFrame):
170
  m = m.iloc[:, 0].squeeze()
171
 
172
- mu_m_ann = float(annualize_mean(m.mean()))
173
- sigma_m_ann = float(annualize_sigma(m.std(ddof=1)))
174
- erp_ann = float(mu_m_ann - rf_ann)
175
 
176
  ex_m = m - rf_m
177
  var_m = float(np.var(ex_m.values, ddof=1))
@@ -183,7 +163,7 @@ def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
183
  cov_sm = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1])
184
  betas[s] = cov_sm / var_m
185
 
186
- betas[MARKET_TICKER] = 1.0 # by definition
187
 
188
  asset_cols = [c for c in R.columns if c != MARKET_TICKER]
189
  cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
@@ -206,77 +186,47 @@ def portfolio_stats(weights: Dict[str, float],
206
  return 0.0, rf_ann, 0.0
207
  w_expo = w / gross
208
  beta_p = float(np.dot([betas.get(t, 0.0) for t in tickers], w_expo))
209
- er_capm = capm_er(beta_p, rf_ann, erp_ann)
210
  cov = cov_ann.reindex(index=tickers, columns=tickers).fillna(0.0).to_numpy()
211
- sigma_p = math.sqrt(max(float(w_expo.T @ cov @ w_expo), 0.0))
212
- return beta_p, er_capm, sigma_p
213
 
214
- # =========================
215
- # Efficient (CML) alternatives
216
- # =========================
217
  def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
218
- """Weights (a on Market, b on Bills) and expected return on CML with same sigma."""
219
  if sigma_mkt <= 1e-12:
220
  return 0.0, 1.0, rf_ann
221
  a = sigma_target / sigma_mkt
222
  return a, 1.0 - a, rf_ann + a * erp_ann
223
 
224
  def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
225
- """Weights (a on Market, b on Bills) and sigma on CML with same expected return."""
226
  if abs(erp_ann) <= 1e-12:
227
- return 0.0, 1.0, 0.0
228
  a = (mu_target - rf_ann) / erp_ann
229
  return a, 1.0 - a, abs(a) * sigma_mkt
230
 
231
- # =========================
232
- # Plot
233
- # =========================
234
- def _pct_arr(x):
235
- x = np.asarray(x, dtype=float)
236
- return x * 100.0
237
-
238
- def plot_cml(
239
- rf_ann, erp_ann, sigma_mkt,
240
- pt_sigma_hist, pt_mu_capm,
241
- same_sigma_sigma, same_sigma_mu,
242
- same_mu_sigma, same_mu_mu,
243
- ) -> Image.Image:
244
- fig = plt.figure(figsize=(6.6, 4.4), dpi=130)
245
-
246
- xmax = max(
247
- 0.3,
248
- sigma_mkt * 2.0,
249
- pt_sigma_hist * 1.4,
250
- same_mu_sigma * 1.4,
251
- same_sigma_sigma * 1.4,
252
- )
253
 
254
- xs = np.linspace(0, xmax, 160)
255
- slope = erp_ann / max(sigma_mkt, 1e-12)
256
- cml = rf_ann + slope * xs
 
257
 
258
- plt.plot(_pct_arr(xs), _pct_arr(cml), label="CML via VOO", linewidth=1.8)
259
- plt.scatter([0.0], [_pct_arr(rf_ann)], label="Risk-free", zorder=5)
260
- plt.scatter([_pct_arr(sigma_mkt)], [_pct_arr(rf_ann + erp_ann)], label="Market (VOO)", zorder=5)
261
 
262
- # Your portfolio point uses CAPM expected return + historical sigma
263
- plt.scatter([_pct_arr(pt_sigma_hist)], [_pct_arr(pt_mu_capm)], label="Your portfolio (CAPM)", zorder=6)
 
 
264
 
265
- # Efficient matches
266
- plt.scatter([_pct_arr(same_sigma_sigma)], [_pct_arr(same_sigma_mu)], label="Efficient: same σ", zorder=5)
267
- plt.scatter([_pct_arr(same_mu_sigma)], [_pct_arr(same_mu_mu)], label="Efficient: same μ", zorder=5)
268
 
269
- # helper guides
270
- plt.plot([_pct_arr(pt_sigma_hist), _pct_arr(same_sigma_sigma)],
271
- [_pct_arr(pt_mu_capm), _pct_arr(same_sigma_mu)],
272
- ls="--", lw=1.1, alpha=0.7, color="gray")
273
- plt.plot([_pct_arr(pt_sigma_hist), _pct_arr(same_mu_sigma)],
274
- [_pct_arr(pt_mu_capm), _pct_arr(same_mu_mu)],
275
- ls="--", lw=1.1, alpha=0.7, color="gray")
276
-
277
- plt.xlabel("σ (annual, %)")
278
- plt.ylabel("E[return] (annual, %)")
279
- plt.legend(loc="best", fontsize=8)
280
  plt.tight_layout()
281
 
282
  buf = io.BytesIO()
@@ -285,180 +235,229 @@ def plot_cml(
285
  buf.seek(0)
286
  return Image.open(buf)
287
 
288
- # =========================
289
- # Synthetic dataset (for recommendations)
290
- # =========================
291
- def dirichlet_signed(k, rng):
292
- signs = rng.choice([-1.0, 1.0], size=k, p=[0.25, 0.75])
293
- raw = rng.dirichlet(np.ones(k))
294
- gross = 1.0 + float(rng.gamma(2.0, 0.5))
295
- return gross * signs * raw
296
-
297
- def build_synth_dataset(universe: List[str],
298
- cov_ann: pd.DataFrame,
299
- betas: Dict[str, float],
300
- rf_ann: float, erp_ann: float,
301
- n_rows: int = N_SYNTH,
302
- seed: int = 123) -> pd.DataFrame:
303
- rng = np.random.default_rng(seed)
304
- U = [u for u in universe if u != MARKET_TICKER] + [MARKET_TICKER]
305
  rows = []
306
  for i in range(n_rows):
307
- k = rng.integers(low=min(2, len(U)), high=min(8, len(U)) + 1)
308
  picks = list(rng.choice(U, size=k, replace=False))
309
- w = dirichlet_signed(k, rng) # exposure weights (can include short)
310
- gross = float(np.sum(np.abs(w)))
311
- if gross <= 1e-12:
312
- continue
313
- w_expo = w / gross
314
- weights = {picks[j]: float(w_expo[j]) for j in range(k)}
315
- beta_i, er_capm_i, sigma_i = portfolio_stats(weights, cov_ann, betas, rf_ann, erp_ann)
 
 
 
 
316
  rows.append({
317
- "id": int(i),
318
  "tickers": ",".join(picks),
319
- "weights": ",".join(f"{x:.6f}" for x in w_expo),
320
- "beta": float(beta_i),
321
- "er_capm": float(er_capm_i),
322
- "sigma": float(sigma_i),
 
323
  })
324
- df = pd.DataFrame(rows)
325
- return df
 
 
 
 
 
 
326
 
327
- # =========================
328
- # Embeddings + MMR selection
329
- # =========================
 
 
 
330
  _embedder = None
331
  def get_embedder():
332
  global _embedder
333
  if _embedder is None:
334
- _embedder = SentenceTransformer(EMBED_MODEL_NAME)
 
335
  return _embedder
336
 
337
- def row_to_sentence(row: pd.Series) -> str:
338
- try:
339
- ts = row["tickers"].split(",")
340
- ws = [float(x) for x in row["weights"].split(",")]
341
- pairs = ", ".join([f"{ts[i]} {ws[i]:+.2f}" for i in range(min(len(ts), len(ws)))])
342
- except Exception:
343
- pairs = ""
344
- return (f"portfolio with sigma {row['sigma']:.4f}, "
345
- f"capm_return {row['er_capm']:.4f}, "
346
- f"beta {row['beta']:.3f}, "
347
- f"exposures {pairs}")
348
-
349
- def mmr_select(query_emb: np.ndarray,
350
- cand_embs: np.ndarray,
351
- k: int = 3,
352
- lambda_param: float = MMR_LAMBDA) -> List[int]:
353
- """
354
- Maximal Marginal Relevance: pick k diverse-yet-relevant indices.
355
- """
356
- if cand_embs.shape[0] <= k:
357
- return list(range(cand_embs.shape[0]))
358
- sim_to_query = st_util.cos_sim(query_emb, cand_embs).cpu().numpy().reshape(-1)
359
- chosen = []
360
- candidate_indices = list(range(cand_embs.shape[0]))
361
- first = int(np.argmax(sim_to_query))
362
- chosen.append(first)
363
- candidate_indices.remove(first)
364
- while len(chosen) < k and candidate_indices:
365
- max_score = -1e9
366
- max_idx = candidate_indices[0]
367
- for idx in candidate_indices:
368
- sim_q = sim_to_query[idx]
369
- sim_d = max(st_util.cos_sim(cand_embs[idx], cand_embs[chosen]).cpu().numpy().reshape(-1))
370
- mmr_score = lambda_param * sim_q - (1.0 - lambda_param) * sim_d
371
- if mmr_score > max_score:
372
- max_score = mmr_score
373
- max_idx = idx
374
- chosen.append(max_idx)
375
- candidate_indices.remove(max_idx)
 
 
 
 
 
376
  return chosen
377
 
378
- # =========================
379
- # Yahoo symbol search (for UX)
380
- # =========================
381
- def yahoo_search(query: str):
382
- if not query or len(query.strip()) == 0:
383
- return []
384
- url = "https://query1.finance.yahoo.com/v1/finance/search"
385
- params = {"q": query.strip(), "quotesCount": 10, "newsCount": 0}
386
- headers = {"User-Agent": "Mozilla/5.0"}
387
- try:
388
- r = requests.get(url, params=params, headers=headers, timeout=10)
389
- r.raise_for_status()
390
- data = r.json()
391
- out = []
392
- for q in data.get("quotes", []):
393
- sym = q.get("symbol")
394
- name = q.get("shortname") or q.get("longname") or ""
395
- exch = q.get("exchDisp") or ""
396
- if sym and sym.isascii():
397
- out.append(f"{sym} | {name} | {exch}")
398
- if not out:
399
- out = [f"{query.strip().upper()} | typed symbol | n/a"]
400
- return out[:10]
401
- except Exception:
402
- return [f"{query.strip().upper()} | typed symbol | n/a"]
403
-
404
- _last_matches = [] # updated on each search
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
 
406
- # =========================
407
- # Formatting helpers
408
- # =========================
409
- def fmt_pct(x: float) -> str:
410
- return f"{x*100:.2f}%"
 
 
 
 
411
 
412
- def fmt_money(x: float) -> str:
413
- return f"${x:,.0f}"
 
 
414
 
415
- # =========================
416
- # Gradio callbacks
417
- # =========================
418
- HORIZON_YEARS = 5.0
419
- RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
420
- RF_ANN = fetch_fred_yield_annual(RF_CODE)
421
-
422
- def do_search(query):
423
- global _last_matches
424
- _last_matches = yahoo_search(query)
425
- note = "Select a symbol from Matches, then click Add."
426
- return note, gr.update(choices=_last_matches, value=None)
427
-
428
- def add_symbol(selection: str, table: pd.DataFrame):
429
- if selection and " | " in selection:
430
- symbol = selection.split(" | ")[0].strip().upper()
431
- elif isinstance(selection, str) and selection.strip():
432
- symbol = selection.strip().upper()
433
- else:
434
- return table, "Pick a row from Matches first."
435
 
436
  current = []
437
- if table is not None and len(table) > 0:
438
  current = [str(x).upper() for x in table["ticker"].tolist() if str(x) != "nan"]
439
-
440
  tickers = current if symbol in current else current + [symbol]
441
- tickers = [t for t in tickers if t]
442
 
443
  val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
444
  tickers = [t for t in tickers if t in val]
445
 
446
  amt_map = {}
447
- if table is not None and len(table) > 0:
448
  for _, r in table.iterrows():
449
  t = str(r.get("ticker", "")).upper()
450
  if t in tickers:
451
  amt_map[t] = float(pd.to_numeric(r.get("amount_usd", 0.0), errors="coerce") or 0.0)
452
 
453
  new_table = pd.DataFrame({"ticker": tickers, "amount_usd": [amt_map.get(t, 0.0) for t in tickers]})
454
- msg = f"Added {symbol}" if symbol in tickers else f"{symbol} not valid or no data"
455
  if len(new_table) > MAX_TICKERS:
456
  new_table = new_table.iloc[:MAX_TICKERS]
457
- msg = f"Reached max of {MAX_TICKERS}"
458
- return new_table, msg
459
 
460
- def lock_ticker_column(tb: pd.DataFrame):
461
- if tb is None or len(tb) == 0:
462
  return pd.DataFrame(columns=["ticker", "amount_usd"])
463
  tickers = [str(x).upper() for x in tb["ticker"].tolist()]
464
  amounts = pd.to_numeric(tb["amount_usd"], errors="coerce").fillna(0.0).tolist()
@@ -467,335 +466,289 @@ def lock_ticker_column(tb: pd.DataFrame):
467
  amounts = amounts[:len(tickers)] + [0.0] * max(0, len(tickers) - len(amounts))
468
  return pd.DataFrame({"ticker": tickers, "amount_usd": amounts})
469
 
470
- def set_horizon(years: float):
471
- y = max(1.0, min(100.0, float(years)))
472
- code = fred_series_for_horizon(y)
473
- rf = fetch_fred_yield_annual(code)
474
- global HORIZON_YEARS, RF_CODE, RF_ANN
475
- HORIZON_YEARS = y
476
- RF_CODE = code
477
- RF_ANN = rf
478
- return f"Risk-free series {code}. Latest annual rate {rf:.2%}. Computations will use this.", rf
479
-
480
- def _table_from_weights(weights: Dict[str, float], gross_amt: float) -> pd.DataFrame:
481
- items = []
482
- for t, w in weights.items():
483
- pct = float(w)
484
- amt = float(w) * gross_amt
485
- items.append({"ticker": t, "weight_%": round(pct * 100.0, 2), "amount_$": round(amt, 2)})
486
- df = pd.DataFrame(items, columns=SUG_COLS)
487
- df["absw"] = df["weight_%"].abs()
488
- df = df.sort_values("absw", ascending=False).drop(columns=["absw"])
489
- return df
490
-
491
- def _weights_dict_from_row(r: pd.Series) -> Dict[str, float]:
492
- ts = [t.strip().upper() for t in str(r["tickers"]).split(",")]
493
- ws = [float(x) for x in str(r["weights"]).split(",")]
494
- wmap = {}
495
- for i in range(min(len(ts), len(ws))):
496
- wmap[ts[i]] = ws[i]
497
- gross = sum(abs(v) for v in wmap.values())
 
 
 
 
 
 
 
 
 
 
 
498
  if gross <= 1e-12:
499
- return {}
500
- return {k: v / gross for k, v in wmap.items()}
 
501
 
502
- def compute(lookback_years: int,
503
- table_input,
504
- risk_bucket: str,
505
- horizon_years: float):
506
 
507
- try:
508
- # --- coerce incoming table to DataFrame (Gradio 5 may pass list-like) ---
509
- if table_input is None:
510
- df = pd.DataFrame(columns=["ticker", "amount_usd"])
511
- elif isinstance(table_input, pd.DataFrame):
512
- df = table_input.copy()
513
- else:
514
- df = pd.DataFrame(table_input, columns=["ticker", "amount_usd"])
515
-
516
- df = df.dropna(how="all")
517
- if df.empty:
518
- return (None, "Add at least one ticker", "", pd.DataFrame(columns=POS_COLS),
519
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=SUG_COLS),
520
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=EFF_COLS),
521
- pd.DataFrame(columns=EFF_COLS), json.dumps([]), 1, "No suggestions yet.")
522
-
523
- # --- sanitize
524
- df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
525
- df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
526
-
527
- symbols = [t for t in df["ticker"].tolist() if t]
528
- symbols = validate_tickers(symbols, int(lookback_years))
529
- if len(symbols) == 0:
530
- return (None, "Could not validate any tickers", "Universe invalid",
531
- pd.DataFrame(columns=POS_COLS),
532
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=SUG_COLS),
533
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=EFF_COLS),
534
- pd.DataFrame(columns=EFF_COLS), json.dumps([]), 1, "No suggestions.")
535
-
536
- # --- universe & amounts
537
- universe = sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER]))
538
- df = df[df["ticker"].isin(symbols)].copy()
539
- amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
540
- gross_amt = sum(abs(v) for v in amounts.values())
541
- if gross_amt <= 1e-9:
542
- return (None, "All amounts are zero", "Universe ok", pd.DataFrame(columns=POS_COLS),
543
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=SUG_COLS),
544
- pd.DataFrame(columns=SUG_COLS), pd.DataFrame(columns=EFF_COLS),
545
- pd.DataFrame(columns=EFF_COLS), json.dumps([]), 1, "No suggestions.")
546
-
547
- weights = {k: v / gross_amt for k, v in amounts.items()}
548
-
549
- # --- risk free & moments
550
- rf_code = fred_series_for_horizon(horizon_years)
551
- rf_ann = fetch_fred_yield_annual(rf_code)
552
- moms = estimate_all_moments_aligned(universe, int(lookback_years), rf_ann)
553
- betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
554
-
555
- # --- portfolio stats (CAPM return + historical sigma)
556
- beta_p, er_capm_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
557
-
558
- # --- efficient alternatives on CML
559
- a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
560
- a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_capm_p, rf_ann, erp_ann, sigma_mkt)
561
-
562
- eff_same_sigma_tbl = _table_from_weights({MARKET_TICKER: a_sigma, BILLS_TICKER: b_sigma}, gross_amt)
563
- eff_same_mu_tbl = _table_from_weights({MARKET_TICKER: a_mu, BILLS_TICKER: b_mu}, gross_amt)
564
-
565
- # --- build synthetic dataset (based ONLY on this universe)
566
- synth = build_synth_dataset(universe, covA, betas, rf_ann, erp_ann, n_rows=N_SYNTH, seed=777)
567
-
568
- # --- risk buckets by sigma (absolute percentage points around median)
569
- median_sigma = float(synth["sigma"].median()) if len(synth) else sigma_p
570
- low_max = max(float(synth["sigma"].min()), median_sigma - 0.05) # 5% below median
571
- high_min = median_sigma + 0.05
572
-
573
- if risk_bucket == "Low":
574
- cand_df = synth[synth["sigma"] <= low_max].copy()
575
- elif risk_bucket == "High":
576
- cand_df = synth[synth["sigma"] >= high_min].copy()
577
- else: # Medium
578
- cand_df = synth[(synth["sigma"] > low_max) & (synth["sigma"] < high_min)].copy()
579
-
580
- if len(cand_df) == 0:
581
- cand_df = synth.copy()
582
-
583
- # --- embed all candidates + query, and pick 3 via MMR for diversity
584
- embed = get_embedder()
585
- cand_sentences = cand_df.apply(row_to_sentence, axis=1).tolist()
586
-
587
- cur_pairs = ", ".join([f"{k}:{v:+.2f}" for k, v in sorted(weights.items())])
588
- q_sentence = f"user portfolio ({risk_bucket} risk); capm_target {er_capm_p:.4f}; sigma_hist {sigma_p:.4f}; exposures {cur_pairs}"
589
-
590
- cand_embs = embed.encode(cand_sentences, convert_to_tensor=True, normalize_embeddings=True, batch_size=64, show_progress_bar=False)
591
- q_emb = embed.encode([q_sentence], convert_to_tensor=True, normalize_embeddings=True)[0]
592
-
593
- sims = st_util.cos_sim(q_emb, cand_embs)[0]
594
- top_idx = sims.topk(k=min(MMR_K, len(cand_df))).indices.cpu().numpy().tolist()
595
- shortlist_embs = cand_embs[top_idx]
596
- mmr_local = mmr_select(q_emb, shortlist_embs, k=3, lambda_param=MMR_LAMBDA)
597
- chosen = [top_idx[i] for i in mmr_local]
598
- recs = cand_df.iloc[chosen].reset_index(drop=True)
599
-
600
- # --- suggestion tables for 3 picks
601
- suggs = []
602
- for _, r in recs.iterrows():
603
- wmap = _weights_dict_from_row(r)
604
- suggs.append({
605
- "weights": wmap,
606
- "er_capm": float(r["er_capm"]),
607
- "sigma": float(r["sigma"]),
608
- "beta": float(r["beta"]),
609
- "table": _table_from_weights(wmap, gross_amt)
610
- })
611
-
612
- # --- plot
613
- img = plot_cml(
614
- rf_ann, erp_ann, sigma_mkt,
615
- sigma_p, er_capm_p,
616
- same_sigma_sigma=sigma_p, same_sigma_mu=mu_eff_sigma,
617
- same_mu_sigma=sigma_eff_mu, same_mu_mu=er_capm_p
618
- )
619
-
620
- # --- positions table (computed)
621
- rows = []
622
- for t in universe:
623
- if t == MARKET_TICKER:
624
- continue
625
- rows.append({
626
- "ticker": t,
627
- "amount_usd": round(amounts.get(t, 0.0), 2),
628
- "weight_exposure": round(weights.get(t, 0.0), 6),
629
- "beta": round(betas.get(t, np.nan), 4) if t != MARKET_TICKER else 1.0
630
- })
631
- pos_table = pd.DataFrame(rows, columns=POS_COLS)
632
-
633
- # --- info summary
634
- info_lines = []
635
- info_lines.append("### Inputs")
636
- info_lines.append(f"- Lookback years **{int(lookback_years)}**")
637
- info_lines.append(f"- Horizon years **{int(round(horizon_years))}**")
638
- info_lines.append(f"- Risk-free **{fmt_pct(rf_ann)}** from **{rf_code}**")
639
- info_lines.append(f"- Market ERP **{fmt_pct(erp_ann)}**")
640
- info_lines.append(f"- Market σ **{fmt_pct(sigma_mkt)}**")
641
- info_lines.append("")
642
- info_lines.append("### Your portfolio (plotted as CAPM return, historical σ)")
643
- info_lines.append(f"- Beta **{beta_p:.2f}**")
644
- info_lines.append(f"- σ (historical) **{fmt_pct(sigma_p)}**")
645
- info_lines.append(f"- E[return] (CAPM / SML) **{fmt_pct(er_capm_p)}**")
646
- info_lines.append("")
647
- info_lines.append("### Efficient alternatives on CML")
648
- info_lines.append(f"- Same σ → Market **{a_sigma:.2f}**, Bills **{b_sigma:.2f}**, Return **{fmt_pct(mu_eff_sigma)}**")
649
- info_lines.append(f"- Same μ → Market **{a_mu:.2f}**, Bills **{b_mu:.2f}**, σ **{fmt_pct(sigma_eff_mu)}**")
650
- info_lines.append("")
651
- info_lines.append(f"### Dataset-based suggestions (risk: **{risk_bucket}**)")
652
- info_lines.append("Use the selector to flip between **Pick #1 / #2 / #3**. Table shows % exposure and $ amounts.")
653
-
654
- current_idx = 1
655
- current = suggs[current_idx - 1] if suggs else None
656
- current_tbl = current["table"] if current else pd.DataFrame(columns=SUG_COLS)
657
- current_msg = ("Pick #1 — "
658
- f"E[μ] {fmt_pct(current['er_capm'])}, σ {fmt_pct(current['sigma'])}, β {current['beta']:.2f}"
659
- ) if current else "No suggestion."
660
-
661
- return (img,
662
- "\n".join(info_lines),
663
- f"Universe set to {', '.join(universe)}",
664
- pos_table,
665
- suggs[0]["table"] if len(suggs) >= 1 else pd.DataFrame(columns=SUG_COLS),
666
- suggs[1]["table"] if len(suggs) >= 2 else pd.DataFrame(columns=SUG_COLS),
667
- suggs[2]["table"] if len(suggs) >= 3 else pd.DataFrame(columns=SUG_COLS),
668
- eff_same_sigma_tbl,
669
- eff_same_mu_tbl,
670
- json.dumps([{
671
- "er_capm": s["er_capm"], "sigma": s["sigma"], "beta": s["beta"],
672
- } for s in suggs]),
673
- current_idx,
674
- current_msg)
675
 
676
- except Exception as e:
677
- msg = f"⚠️ Compute failed: {type(e).__name__}: {e}"
678
- empty = pd.DataFrame()
679
- return (None, msg, "Error", empty, empty, empty, empty, empty, empty, "[]", 1, msg)
 
 
 
680
 
681
- def on_pick_change(idx: int, meta_json: str):
 
 
682
  try:
683
- data = json.loads(meta_json)
684
  except Exception:
685
- data = []
686
- if not data:
687
- return "No suggestion."
688
- i = int(idx) - 1
689
- i = max(0, min(i, len(data)-1))
690
- s = data[i]
691
- return f"Pick #{i+1} — E[μ] {fmt_pct(s['er_capm'])}, σ {fmt_pct(s['sigma'])}, β {s['beta']:.2f}"
692
-
693
- # =========================
694
- # UI
695
- # =========================
696
- with gr.Blocks(title="Efficient Portfolio Advisor", css="""
697
- #small-note {font-size: 12px; color:#666;}
698
- """) as demo:
699
-
700
- gr.Markdown("## Efficient Portfolio Advisor\n"
701
- "Search symbols, enter **$ amounts**, set your **horizon**. "
702
- "The plot shows your **CAPM expected return** vs **historical σ**, alongside the **CML**. "
703
- "Recommendations are generated from a **synthetic dataset (1000 portfolios)** and ranked with **local embeddings (BGE-base)** for relevance + diversity.")
704
-
705
- with gr.Tab("Build Portfolio"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
  with gr.Row():
707
- with gr.Column(scale=1):
708
- q = gr.Textbox(label="Search symbol")
709
- search_note = gr.Markdown(elem_id="small-note")
710
- matches = gr.Dropdown(choices=[], label="Matches", value=None)
711
- search_btn = gr.Button("Search")
712
- add_btn = gr.Button("Add selected to portfolio")
713
-
714
- gr.Markdown("### Positions (enter dollars; negatives allowed for shorts)")
715
- table = gr.Dataframe(
716
- headers=["ticker", "amount_usd"],
717
- datatype=["str", "number"],
718
- row_count=0,
719
- col_count=(2, "fixed"),
720
- wrap=True,
721
- type="pandas" # important for Gradio 5
722
- )
723
-
724
- # Handy sample
725
- sample_btn = gr.Button("Load sample portfolio")
726
-
727
- with gr.Column(scale=1):
728
- horizon = gr.Slider(1, 30, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Investment horizon (years)")
729
- lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback (years) for β and σ")
730
- risk_bucket = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Recommendation risk level")
731
- run_btn = gr.Button("Compute")
732
-
733
- rf_msg = gr.Textbox(label="Risk-free source / status", interactive=False)
734
- search_btn.click(fn=do_search, inputs=q, outputs=[search_note, matches])
735
- add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
736
- table.change(fn=lock_ticker_column, inputs=table, outputs=table)
737
- horizon.change(fn=set_horizon, inputs=horizon, outputs=[rf_msg, gr.State()]) # rf_msg + silent
738
- sample_btn.click(lambda: pd.DataFrame({"ticker": ["AAPL","MSFT","VOO"], "amount_usd": [3000, 2000, 5000]}),
739
- inputs=None, outputs=table)
740
-
741
- with gr.Tab("Results"):
742
  with gr.Row():
743
- with gr.Column(scale=1):
744
- plot = gr.Image(label="Capital Market Line", type="pil")
745
- summary = gr.Markdown()
746
- universe_msg = gr.Textbox(label="Universe status", interactive=False)
747
-
748
- with gr.Column(scale=1):
749
- positions = gr.Dataframe(
750
- label="Computed positions",
751
- headers=POS_COLS,
752
- datatype=["str", "number", "number", "number"],
753
- col_count=(len(POS_COLS), "fixed"),
754
- interactive=False,
755
- type="pandas"
756
- )
757
-
758
- gr.Markdown("### Recommendations (always from embeddings)")
759
- with gr.Row():
760
- sugg1 = gr.Dataframe(label="Pick #1", interactive=False, type="pandas")
761
- sugg2 = gr.Dataframe(label="Pick #2", interactive=False, type="pandas")
762
- sugg3 = gr.Dataframe(label="Pick #3", interactive=False, type="pandas")
763
-
764
- with gr.Row():
765
- pick_idx = gr.Slider(1, 3, value=1, step=1, label="Carousel: show Pick #")
766
- pick_meta = gr.Textbox(value="[]", visible=False)
767
- pick_msg = gr.Markdown("")
768
-
769
- gr.Markdown("### Efficient alternatives on the CML")
770
- eff_same_sigma_tbl = gr.Dataframe(label="Efficient: Same σ", interactive=False, type="pandas")
771
- eff_same_mu_tbl = gr.Dataframe(label="Efficient: Same μ", interactive=False, type="pandas")
772
-
773
- run_btn.click(
774
- fn=compute,
775
- inputs=[lookback, table, risk_bucket, horizon],
776
- outputs=[
777
- plot, summary, universe_msg, positions,
778
- sugg1, sugg2, sugg3,
779
- eff_same_sigma_tbl, eff_same_mu_tbl,
780
- pick_meta, pick_idx, pick_msg
781
- ]
782
- )
783
- pick_idx.change(fn=on_pick_change, inputs=[pick_idx, pick_meta], outputs=pick_msg)
784
-
785
- with gr.Tab("About"):
786
- gr.Markdown(
787
- "### Modality & Model\n"
788
- "- **Modality**: Text (portfolio → text descriptions) powering **embeddings**\n"
789
- "- **Embedding model**: `BAAI/bge-base-en-v1.5` (local, downloaded once; no API)\n\n"
790
- "### Use case\n"
791
- "Given a portfolio, we build a synthetic dataset of 1,000 alternative mixes **using the same tickers**, "
792
- "compute each mix’s **CAPM return, σ, and β**, and rank candidates with embeddings to return **3 diverse, relevant suggestions** "
793
- "for **Low / Medium / High** risk.\n\n"
794
- "### Theory links\n"
795
- "- Portfolio expected return in the plot uses **CAPM (SML)**, while σ is historical.\n"
796
- "- The **CML** and the two **efficient alternatives** (same σ, same μ) use a mix of **Market (VOO)** and **Bills**."
797
- )
798
 
799
  if __name__ == "__main__":
800
- # On HF Spaces you don't need share=True; binding to 0.0.0.0 is enough.
801
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  # app.py
2
+ # Efficient Portfolio Advisor — CAPM-on-CML plot + 1,000-mix dataset + 3x3 suggestions
3
+ # - X axis: historical sigma (from covariances over lookback)
4
+ # - Y axis: CAPM E[r] = rf + beta * ERP
5
+ # - Plot includes two efficient CML mixes: same-σ and same-μ as the user portfolio
6
+ # - Dataset: 1,000 long-only candidate mixes from *current* universe (incl. VOO)
7
+ # - Suggestions: Tabs Low/Medium/High, 3 picks each, chosen by exposure+embedding sim with MMR
8
+ # - Embeddings: FinLang/finance-embeddings-investopedia
9
+ # - Score = α * exposure_similarity + (1-α) * embedding_similarity (α=0.6); MMR λ=0.7
10
+ # - CSV of dataset downloadable.
11
+ import os, io, math, time, json, warnings
12
  warnings.filterwarnings("ignore")
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  from typing import List, Tuple, Dict, Optional
15
 
16
  import numpy as np
17
  import pandas as pd
18
  import matplotlib.pyplot as plt
19
  from PIL import Image
 
20
  import requests
21
  import yfinance as yf
22
+ import gradio as gr
23
 
24
+ # ---------------- config ----------------
25
+ DATA_DIR = "data"
26
+ os.makedirs(DATA_DIR, exist_ok=True)
27
 
 
 
 
 
28
  MAX_TICKERS = 30
29
+ DEFAULT_LOOKBACK_YEARS = 10
30
+ MARKET_TICKER = "VOO" # market proxy on CML
31
+ BILLS_LABEL = "Bills" # label for risk-free leg in efficient mixes (display only)
32
+
33
+ SYNTH_ROWS = 1000 # dataset size for suggestions
34
+ EMB_MODEL = "FinLang/finance-embeddings-investopedia"
35
+ ALPHA = 0.60 # exposure-vs-embedding blend
36
+ MMR_LAMBDA = 0.70 # MMR diversity strength
37
+ SHORTLIST_K = 40 # shortlist before MMR per band
38
+
39
+ # Globals updated with horizon changes
40
+ HORIZON_YEARS = 10
41
+ RF_CODE = "DGS10"
42
+ RF_ANN = 0.0375 # initialized at launch
43
+
44
+ # ---------------- helpers ----------------
 
 
 
 
 
 
 
 
 
 
45
  def fred_series_for_horizon(years: float) -> str:
46
  y = max(1.0, min(100.0, float(years)))
47
+ if y <= 2: return "DGS2"
48
+ if y <= 3: return "DGS3"
49
+ if y <= 5: return "DGS5"
50
+ if y <= 7: return "DGS7"
51
+ if y <= 10: return "DGS10"
52
+ if y <= 20: return "DGS20"
53
  return "DGS30"
54
 
55
  def fetch_fred_yield_annual(code: str) -> float:
 
56
  url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?id={code}"
57
  try:
58
  r = requests.get(url, timeout=10)
 
63
  except Exception:
64
  return 0.03
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
67
+ tickers = list(dict.fromkeys([t.upper().strip() for t in tickers]))
68
+ start = (pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)).date()
69
+ end = pd.Timestamp.today(tz="UTC").date()
70
+
71
+ df = yf.download(
72
+ tickers,
73
  start=start, end=end,
74
+ interval="1mo",
75
+ auto_adjust=True,
76
+ actions=False,
77
+ progress=False,
78
+ group_by="column",
79
+ threads=False,
80
  )
81
+
82
+ # Normalize to: columns = tickers, values = prices
83
+ if isinstance(df, pd.Series):
84
+ df = df.to_frame()
85
+ if isinstance(df.columns, pd.MultiIndex):
86
+ lvl0 = [str(x) for x in df.columns.get_level_values(0).unique()]
87
+ if "Close" in lvl0:
88
+ df = df["Close"]
89
+ elif "Adj Close" in lvl0:
90
+ df = df["Adj Close"]
91
+ else:
92
+ df = df.xs(df.columns.levels[0][-1], axis=1, level=0, drop_level=True)
93
+
94
+ cols = [c for c in tickers if c in df.columns]
95
+ out = df[cols].dropna(how="all").fillna(method="ffill")
96
+ return out
97
 
98
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
99
  return prices.pct_change().dropna()
100
 
101
+ def yahoo_search(query: str):
102
+ if not query or not str(query).strip():
103
+ return []
104
+ url = "https://query1.finance.yahoo.com/v1/finance/search"
105
+ params = {"q": query.strip(), "quotesCount": 10, "newsCount": 0}
106
+ headers = {"User-Agent": "Mozilla/5.0"}
107
+ try:
108
+ r = requests.get(url, params=params, headers=headers, timeout=10)
109
+ r.raise_for_status()
110
+ data = r.json()
111
+ out = []
112
+ for q in data.get("quotes", []):
113
+ sym = q.get("symbol")
114
+ name = q.get("shortname") or q.get("longname") or ""
115
+ exch = q.get("exchDisp") or ""
116
+ if sym and sym.isascii():
117
+ out.append(f"{sym} | {name} | {exch}")
118
+ if not out:
119
+ out = [f"{query.strip().upper()} | typed symbol | n/a"]
120
+ return out[:10]
121
+ except Exception:
122
+ return [f"{query.strip().upper()} | typed symbol | n/a"]
123
+
124
  def validate_tickers(symbols: List[str], years: int) -> List[str]:
125
+ base = [s for s in dict.fromkeys([t.upper().strip() for t in symbols]) if s]
 
 
126
  px = fetch_prices_monthly(base + [MARKET_TICKER], years)
127
+ ok = [s for s in base if s in px.columns]
128
+ if MARKET_TICKER not in px.columns:
129
+ return [] # need market for aligned CAPM
 
130
  return ok
131
 
132
+ # -------------- aligned moments --------------
 
 
 
 
 
133
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
134
+ uniq = [c for c in dict.fromkeys(symbols) if c != MARKET_TICKER]
135
+ tickers = uniq + [MARKET_TICKER]
136
+ px = fetch_prices_monthly(tickers, years)
 
137
  rets = monthly_returns(px)
138
+ cols = [c for c in uniq if c in rets.columns] + ([MARKET_TICKER] if MARKET_TICKER in rets.columns else [])
139
  R = rets[cols].dropna(how="any")
140
  return R.loc[:, ~R.columns.duplicated()]
141
 
142
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
143
+ R = get_aligned_monthly_returns(symbols, years)
144
+ if MARKET_TICKER not in R.columns or len(R) < 3:
145
+ raise ValueError("Not enough aligned data with market proxy.")
146
  rf_m = rf_ann / 12.0
147
 
148
  m = R[MARKET_TICKER]
149
  if isinstance(m, pd.DataFrame):
150
  m = m.iloc[:, 0].squeeze()
151
 
152
+ mu_m_ann = float(m.mean() * 12.0)
153
+ sigma_m_ann = float(m.std(ddof=1) * math.sqrt(12.0))
154
+ erp_ann = float(mu_m_ann - rf_ann)
155
 
156
  ex_m = m - rf_m
157
  var_m = float(np.var(ex_m.values, ddof=1))
 
163
  cov_sm = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1])
164
  betas[s] = cov_sm / var_m
165
 
166
+ betas[MARKET_TICKER] = 1.0
167
 
168
  asset_cols = [c for c in R.columns if c != MARKET_TICKER]
169
  cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
 
186
  return 0.0, rf_ann, 0.0
187
  w_expo = w / gross
188
  beta_p = float(np.dot([betas.get(t, 0.0) for t in tickers], w_expo))
189
+ mu_capm = capm_er(beta_p, rf_ann, erp_ann)
190
  cov = cov_ann.reindex(index=tickers, columns=tickers).fillna(0.0).to_numpy()
191
+ sigma_hist = float(max(w_expo.T @ cov @ w_expo, 0.0)) ** 0.5
192
+ return beta_p, mu_capm, sigma_hist
193
 
194
+ # -------------- efficient CML mixes --------------
 
 
195
  def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
 
196
  if sigma_mkt <= 1e-12:
197
  return 0.0, 1.0, rf_ann
198
  a = sigma_target / sigma_mkt
199
  return a, 1.0 - a, rf_ann + a * erp_ann
200
 
201
  def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
 
202
  if abs(erp_ann) <= 1e-12:
203
+ return 0.0, 1.0, rf_ann
204
  a = (mu_target - rf_ann) / erp_ann
205
  return a, 1.0 - a, abs(a) * sigma_mkt
206
 
207
+ # -------------- plotting (CAPM on CML) --------------
208
+ def _pct(x): return np.asarray(x, dtype=float) * 100.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
+ def plot_cml(rf_ann, erp_ann, sigma_mkt,
211
+ sigma_hist, mu_capm,
212
+ sugg_mu=None, sugg_sigma=None) -> Image.Image:
213
+ fig = plt.figure(figsize=(6, 4), dpi=120)
214
 
215
+ xmax = max(0.3, sigma_mkt * 2.2, (sigma_hist or 0.0) * 1.6, (sugg_sigma or 0.0) * 1.6)
216
+ xs = np.linspace(0, xmax, 200)
217
+ cml = rf_ann + (erp_ann / max(sigma_mkt, 1e-9)) * xs
218
 
219
+ plt.plot(_pct(xs), _pct(cml), label="CML via Market", linewidth=1.8)
220
+ plt.scatter([_pct(0)], [_pct(rf_ann)], label="Risk-free")
221
+ plt.scatter([_pct(sigma_mkt)], [_pct(rf_ann + erp_ann)], label="Market (VOO)")
222
+ plt.scatter([_pct(sigma_hist)], [_pct(mu_capm)], label="Your CAPM point", marker="o")
223
 
224
+ if sugg_mu is not None and sugg_sigma is not None:
225
+ plt.scatter([_pct(sugg_sigma)], [_pct(sugg_mu)], label="Selected Suggestion", marker="X", s=60)
 
226
 
227
+ plt.xlabel("σ (annualized, %)")
228
+ plt.ylabel("Expected return (annual, %)")
229
+ plt.legend(loc="best")
 
 
 
 
 
 
 
 
230
  plt.tight_layout()
231
 
232
  buf = io.BytesIO()
 
235
  buf.seek(0)
236
  return Image.open(buf)
237
 
238
+ # -------------- synthetic dataset (from current universe) --------------
239
+ def build_synthetic_dataset(universe: List[str],
240
+ covA: pd.DataFrame,
241
+ betas: Dict[str, float],
242
+ rf_ann: float,
243
+ erp_ann: float,
244
+ sigma_mkt: float,
245
+ n_rows: int = SYNTH_ROWS) -> pd.DataFrame:
246
+ rng = np.random.default_rng(12345)
247
+ U = list(universe)
248
+ if not U:
249
+ U = [MARKET_TICKER]
250
+
 
 
 
 
251
  rows = []
252
  for i in range(n_rows):
253
+ k = int(rng.integers(low=2, high=min(8, len(U)) + 1))
254
  picks = list(rng.choice(U, size=k, replace=False))
255
+ w = rng.dirichlet(np.ones(k)) # long-only, sums to 1
256
+
257
+ beta_p = float(np.dot([betas.get(t, 0.0) for t in picks], w))
258
+ mu_capm = capm_er(beta_p, rf_ann, erp_ann)
259
+
260
+ sub = covA.reindex(index=picks, columns=picks).fillna(0.0).to_numpy()
261
+ sigma_hist = float(max(w.T @ sub @ w, 0.0)) ** 0.5
262
+
263
+ # CAPM "equivalent" sigma on CML for the same expected return
264
+ sigma_capm = abs(beta_p) * sigma_mkt
265
+
266
  rows.append({
 
267
  "tickers": ",".join(picks),
268
+ "weights": ",".join(f"{x:.6f}" for x in w),
269
+ "beta": beta_p,
270
+ "mu_capm": mu_capm,
271
+ "sigma_hist": sigma_hist,
272
+ "sigma_capm": sigma_capm
273
  })
274
+ return pd.DataFrame(rows)
275
+
276
+ # -------------- banding by σ (CAPM) --------------
277
+ def _band_bounds(sigma_mkt: float, band: str) -> Tuple[float, float]:
278
+ b = (band or "Medium").strip().lower()
279
+ if b.startswith("low"): return 0.0, 0.8 * sigma_mkt
280
+ if b.startswith("high"): return 1.2 * sigma_mkt, 3.0 * sigma_mkt
281
+ return 0.8 * sigma_mkt, 1.2 * sigma_mkt
282
 
283
+ def slice_band(df: pd.DataFrame, band: str, sigma_mkt: float) -> pd.DataFrame:
284
+ lo, hi = _band_bounds(sigma_mkt, band)
285
+ pick = df[(df["sigma_capm"] >= lo) & (df["sigma_capm"] <= hi)].copy()
286
+ return pick if not pick.empty else df.copy()
287
+
288
+ # -------------- embeddings + exposure similarity + MMR --------------
289
  _embedder = None
290
  def get_embedder():
291
  global _embedder
292
  if _embedder is None:
293
+ from sentence_transformers import SentenceTransformer
294
+ _embedder = SentenceTransformer(EMB_MODEL)
295
  return _embedder
296
 
297
+ def _weights_dict_from_row(r: pd.Series) -> Dict[str, float]:
298
+ ts = [t.strip().upper() for t in str(r["tickers"]).split(",")]
299
+ ws = [float(x) for x in str(r["weights"]).split(",")]
300
+ wmap = {ts[i]: ws[i] for i in range(min(len(ts), len(ws)))}
301
+ s = sum(wmap.values()) or 1.0
302
+ return {k: max(0.0, v) / s for k, v in wmap.items()} # ensure long-only normalized
303
+
304
+ def _aligned_vec(universe: List[str], wmap: Dict[str, float]) -> np.ndarray:
305
+ # vector in the same order
306
+ return np.array([float(wmap.get(t, 0.0)) for t in universe], dtype=float)
307
+
308
+ def cosine_sim(a: np.ndarray, b: np.ndarray) -> float:
309
+ na = np.linalg.norm(a); nb = np.linalg.norm(b)
310
+ if na == 0 or nb == 0: return 0.0
311
+ return float(np.dot(a, b) / (na * nb))
312
+
313
+ def portfolio_embedding(weights: Dict[str, float]) -> np.ndarray:
314
+ # weighted average of ticker embeddings
315
+ model = get_embedder()
316
+ toks = list(weights.keys())
317
+ if not toks: return np.zeros((get_embedder().get_sentence_embedding_dimension(),), dtype=float)
318
+ embs = model.encode(toks, convert_to_numpy=True, normalize_embeddings=True)
319
+ w = np.array([weights[t] for t in toks], dtype=float)
320
+ w = w / (w.sum() or 1.0)
321
+ vec = (embs * w[:, None]).sum(axis=0)
322
+ # normalize
323
+ n = np.linalg.norm(vec)
324
+ return vec / (n if n else 1.0)
325
+
326
+ def mmr(query_vec: np.ndarray, cand_vecs: np.ndarray, k: int, lam: float) -> List[int]:
327
+ # classic MMR on cosine sim
328
+ if len(cand_vecs) <= k: return list(range(len(cand_vecs)))
329
+ sims_q = cand_vecs @ query_vec
330
+ chosen = [int(np.argmax(sims_q))]
331
+ candidates = set(range(len(cand_vecs))) - set(chosen)
332
+ while len(chosen) < k and candidates:
333
+ best_i, best_score = None, -1e9
334
+ for i in list(candidates):
335
+ sim_q = sims_q[i]
336
+ sim_d = max(float(cand_vecs[i] @ cand_vecs[j]) for j in chosen)
337
+ score = lam * sim_q - (1.0 - lam) * sim_d
338
+ if score > best_score:
339
+ best_score = score; best_i = i
340
+ chosen.append(best_i); candidates.remove(best_i)
341
  return chosen
342
 
343
+ def pick_3_for_band(synth: pd.DataFrame,
344
+ band: str,
345
+ sigma_mkt: float,
346
+ uni: List[str],
347
+ user_w: Dict[str, float]) -> Tuple[List[Dict], List[pd.DataFrame]]:
348
+ # shortlist by top CAPM returns within band
349
+ band_df = slice_band(synth, band, sigma_mkt)
350
+ band_df = band_df.sort_values("mu_capm", ascending=False).head(SHORTLIST_K).reset_index(drop=True)
351
+ if band_df.empty:
352
+ return [], []
353
+
354
+ # exposure vectors
355
+ user_vec = _aligned_vec(uni, user_w)
356
+
357
+ # portfolio embedding
358
+ q_emb = portfolio_embedding(user_w)
359
+
360
+ # candidate embeddings (weighted avg of ticker embeddings)
361
+ c_wmaps = [ _weights_dict_from_row(r) for _, r in band_df.iterrows() ]
362
+ toks_list = [list(wm.keys()) for wm in c_wmaps]
363
+ # flatten encode unique tokens once
364
+ tok_set = sorted(set(t for toks in toks_list for t in toks))
365
+ model = get_embedder()
366
+ tok_embs = model.encode(tok_set, convert_to_numpy=True, normalize_embeddings=True)
367
+ tok_idx = {t:i for i,t in enumerate(tok_set)}
368
+
369
+ cand_vecs = []
370
+ expo_sims = []
371
+ for wm in c_wmaps:
372
+ # exposure sim (cosine on aligned vectors)
373
+ c_vec = _aligned_vec(uni, wm)
374
+ expo_sims.append(cosine_sim(user_vec, c_vec))
375
+ # weighted-avg ticker embedding
376
+ if wm:
377
+ w = np.array([wm[t] for t in wm.keys()], dtype=float)
378
+ w = w / (w.sum() or 1.0)
379
+ e = np.vstack([tok_embs[tok_idx[t]] for t in wm.keys()])
380
+ v = (e * w[:,None]).sum(axis=0)
381
+ v = v / (np.linalg.norm(v) or 1.0)
382
+ cand_vecs.append(v)
383
+ else:
384
+ cand_vecs.append(np.zeros_like(tok_embs[0]))
385
+
386
+ cand_vecs = np.vstack(cand_vecs)
387
+ # embedding sim: dot with q_emb (already normalized)
388
+ emb_sims = cand_vecs @ q_emb
389
+
390
+ # blended score
391
+ scores = ALPHA * np.array(expo_sims) + (1.0 - ALPHA) * np.array(emb_sims)
392
+ short_idx = np.argsort(-scores)[:min(12, len(scores))]
393
+
394
+ # MMR on the short list to get 3 diverse
395
+ mmr_idx_local = mmr(q_emb, cand_vecs[short_idx], k=3, lam=MMR_LAMBDA)
396
+ chosen = [int(short_idx[i]) for i in mmr_idx_local]
397
+ picks = band_df.iloc[chosen].reset_index(drop=True)
398
+
399
+ # tables (% and $) for each pick
400
+ gross_amt = sum(abs(v) for v in user_w.values()) or 1.0
401
+ tbls = []
402
+ metas = []
403
+ for _, r in picks.iterrows():
404
+ wm = _weights_dict_from_row(r)
405
+ rows = [{"ticker": t, "weight_%": round(w*100.0, 2), "amount_$": round(w*gross_amt, 2)} for t, w in wm.items()]
406
+ df = pd.DataFrame(rows, columns=["ticker", "weight_%", "amount_$"]).sort_values("weight_%", ascending=False)
407
+ tbls.append(df.reset_index(drop=True))
408
+ metas.append({"mu": float(r["mu_capm"]), "sigma": float(r["sigma_capm"])})
409
+ return metas, tbls
410
+
411
+ # -------------- UI helpers --------------
412
+ def empty_positions_df():
413
+ return pd.DataFrame(columns=["ticker", "amount_usd", "weight_exposure", "beta"])
414
+
415
+ def empty_suggestion_df():
416
+ return pd.DataFrame(columns=["ticker", "weight_%", "amount_$"])
417
 
418
+ def set_horizon(years: float):
419
+ y = max(1.0, min(100.0, float(years)))
420
+ code = fred_series_for_horizon(y)
421
+ rf = fetch_fred_yield_annual(code)
422
+ global HORIZON_YEARS, RF_CODE, RF_ANN
423
+ HORIZON_YEARS = y
424
+ RF_CODE = code
425
+ RF_ANN = rf
426
+ return f"Risk-free series {code}. Latest annual rate {rf:.2%}."
427
 
428
+ def search_tickers_cb(q: str):
429
+ opts = yahoo_search(q)
430
+ note = "Select a symbol and click 'Add selected to portfolio'." if opts else "No matches."
431
+ return note, gr.update(choices=opts, value=None)
432
 
433
+ def add_symbol(selection: str, table: Optional[pd.DataFrame]):
434
+ if not selection:
435
+ return table if isinstance(table, pd.DataFrame) else pd.DataFrame(columns=["ticker","amount_usd"]), "Pick a row in Matches first."
436
+ symbol = selection.split("|")[0].strip().upper()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
438
  current = []
439
+ if isinstance(table, pd.DataFrame) and not table.empty:
440
  current = [str(x).upper() for x in table["ticker"].tolist() if str(x) != "nan"]
 
441
  tickers = current if symbol in current else current + [symbol]
 
442
 
443
  val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
444
  tickers = [t for t in tickers if t in val]
445
 
446
  amt_map = {}
447
+ if isinstance(table, pd.DataFrame) and not table.empty:
448
  for _, r in table.iterrows():
449
  t = str(r.get("ticker", "")).upper()
450
  if t in tickers:
451
  amt_map[t] = float(pd.to_numeric(r.get("amount_usd", 0.0), errors="coerce") or 0.0)
452
 
453
  new_table = pd.DataFrame({"ticker": tickers, "amount_usd": [amt_map.get(t, 0.0) for t in tickers]})
 
454
  if len(new_table) > MAX_TICKERS:
455
  new_table = new_table.iloc[:MAX_TICKERS]
456
+ return new_table, f"Reached max of {MAX_TICKERS}."
457
+ return new_table, f"Added {symbol}."
458
 
459
+ def lock_ticker_column(tb: Optional[pd.DataFrame]):
460
+ if not isinstance(tb, pd.DataFrame) or tb.empty:
461
  return pd.DataFrame(columns=["ticker", "amount_usd"])
462
  tickers = [str(x).upper() for x in tb["ticker"].tolist()]
463
  amounts = pd.to_numeric(tb["amount_usd"], errors="coerce").fillna(0.0).tolist()
 
466
  amounts = amounts[:len(tickers)] + [0.0] * max(0, len(tickers) - len(amounts))
467
  return pd.DataFrame({"ticker": tickers, "amount_usd": amounts})
468
 
469
+ # -------------- main compute --------------
470
+ UNIVERSE: List[str] = [MARKET_TICKER, "QQQ", "VTI", "SOXX", "IBIT"]
471
+
472
+ def compute(
473
+ years_lookback: int,
474
+ table: Optional[pd.DataFrame],
475
+ pick_low: int,
476
+ pick_med: int,
477
+ pick_high: int
478
+ ):
479
+ # sanitize table
480
+ if isinstance(table, pd.DataFrame):
481
+ df = table.copy()
482
+ else:
483
+ df = pd.DataFrame(columns=["ticker", "amount_usd"])
484
+ df = df.dropna(how="all")
485
+ for col in ("ticker","amount_usd"):
486
+ if col not in df.columns: df[col] = []
487
+ df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
488
+ df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
489
+
490
+ symbols = [t for t in df["ticker"].tolist() if t]
491
+ if len(symbols) == 0:
492
+ empty = empty_positions_df()
493
+ e = "Add at least one ticker."
494
+ return None, e, "Universe empty.", empty, empty_suggestion_df(), empty_suggestion_df(), empty_suggestion_df(), empty_suggestion_df(), empty_suggestion_df(), empty_suggestion_df(), json.dumps({}), e
495
+
496
+ symbols = validate_tickers(symbols, years_lookback)
497
+ if len(symbols) == 0:
498
+ empty = empty_positions_df()
499
+ e = "Could not validate any tickers."
500
+ return None, e, "Universe invalid.", empty, empty_suggestion_df(), empty_suggestion_df(), empty_suggestion_df(), empty_suggestion_df(), empty_suggestion_df(), empty_suggestion_df(), json.dumps({}), e
501
+
502
+ global UNIVERSE
503
+ UNIVERSE = list(sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
504
+
505
+ df = df[df["ticker"].isin(symbols)].copy()
506
+ amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
507
+ gross = sum(abs(v) for v in amounts.values())
508
  if gross <= 1e-12:
509
+ empty = empty_positions_df()
510
+ e = "All amounts are zero."
511
+ return None, e, "Universe ok.", empty, *(empty_suggestion_df() for _ in range(6)), json.dumps({}), e
512
 
513
+ weights = {k: v / gross for k, v in amounts.items()}
514
+ rf_ann = RF_ANN
 
 
515
 
516
+ # Moments
517
+ moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
518
+ betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
 
520
+ # Portfolio CAPM stats (Y) vs historical σ (X)
521
+ beta_p, mu_capm, sigma_hist = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
522
+ sigma_capm = abs(beta_p) * sigma_mkt # for info only
523
+
524
+ # Efficient alternatives on CML
525
+ a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_hist, rf_ann, erp_ann, sigma_mkt)
526
+ a_mu, b_mu, sigma_eff_mu = efficient_same_return(mu_capm, rf_ann, erp_ann, sigma_mkt)
527
 
528
+ # Dataset (1,000 mixes) and save CSV
529
+ synth = build_synthetic_dataset(UNIVERSE, covA, betas, rf_ann, erp_ann, sigma_mkt, n_rows=SYNTH_ROWS)
530
+ csv_path = os.path.join(DATA_DIR, f"investor_profiles_{int(time.time())}.csv")
531
  try:
532
+ synth.to_csv(csv_path, index=False)
533
  except Exception:
534
+ csv_path = None
535
+
536
+ # Picks per band (Low/Medium/High)
537
+ meta_low, tbls_low = pick_3_for_band(synth, "Low", sigma_mkt, UNIVERSE, weights)
538
+ meta_med, tbls_med = pick_3_for_band(synth, "Medium", sigma_mkt, UNIVERSE, weights)
539
+ meta_high, tbls_high = pick_3_for_band(synth, "High", sigma_mkt, UNIVERSE, weights)
540
+
541
+ # fallbacks if any band empty
542
+ def ensure_three(meta, tbls):
543
+ while len(meta) < 3:
544
+ meta.append({"mu": mu_capm, "sigma": sigma_capm})
545
+ tbls.append(empty_suggestion_df())
546
+ return meta[:3], tbls[:3]
547
+
548
+ meta_low, tbls_low = ensure_three(meta_low, tbls_low)
549
+ meta_med, tbls_med = ensure_three(meta_med, tbls_med)
550
+ meta_high, tbls_high = ensure_three(meta_high, tbls_high)
551
+
552
+ # clamp pick indices to 1..3
553
+ pick_low = int(max(1, min(3, pick_low or 1)))
554
+ pick_med = int(max(1, min(3, pick_med or 1)))
555
+ pick_high = int(max(1, min(3, pick_high or 1)))
556
+
557
+ # default highlighted suggestion: Medium / chosen index
558
+ sel = meta_med[pick_med-1]
559
+ img = plot_cml(rf_ann, erp_ann, sigma_mkt, sigma_hist, mu_capm, sel["mu"], sel["sigma"])
560
+
561
+ # positions table (computed)
562
+ pos_table = pd.DataFrame(
563
+ [{
564
+ "ticker": t,
565
+ "amount_usd": amounts.get(t, 0.0),
566
+ "weight_exposure": weights.get(t, 0.0),
567
+ "beta": 1.0 if t == MARKET_TICKER else betas.get(t, np.nan)
568
+ } for t in symbols],
569
+ columns=["ticker", "amount_usd", "weight_exposure", "beta"]
570
+ )
571
+
572
+ # efficient mixes tables (display-only)
573
+ eff_same_sigma_tbl = pd.DataFrame([
574
+ {"ticker": MARKET_TICKER, "weight_%": round(a_sigma*100,2), "amount_$": round(a_sigma*gross,2)},
575
+ {"ticker": BILLS_LABEL, "weight_%": round(b_sigma*100,2), "amount_$": round(b_sigma*gross,2)},
576
+ ])
577
+ eff_same_mu_tbl = pd.DataFrame([
578
+ {"ticker": MARKET_TICKER, "weight_%": round(a_mu*100,2), "amount_$": round(a_mu*gross,2)},
579
+ {"ticker": BILLS_LABEL, "weight_%": round(b_mu*100,2), "amount_$": round(b_mu*gross,2)},
580
+ ])
581
+
582
+ # info summary
583
+ info = "\n".join([
584
+ "### Inputs",
585
+ f"- Lookback years {years_lookback}",
586
+ f"- Horizon years {int(round(HORIZON_YEARS))}",
587
+ f"- Risk-free {rf_ann:.2%} from {RF_CODE}",
588
+ f"- Market ERP {erp_ann:.2%}",
589
+ f"- Market σ {sigma_mkt:.2%}",
590
+ "",
591
+ "### Your portfolio (CAPM on CML plot)",
592
+ f"- Beta {beta_p:.2f}",
593
+ f"- Expected return (CAPM / SML) {mu_capm:.2%}",
594
+ f"- σ (historical) {sigma_hist:.2%}",
595
+ "",
596
+ "### Efficient alternatives on CML",
597
+ f"- Same σ: Market {a_sigma:.2f}, Bills {b_sigma:.2f}, E[r] {mu_eff_sigma:.2%}",
598
+ f"- Same μ: Market {a_mu:.2f}, Bills {b_mu:.2f}, σ {sigma_eff_mu:.2%}",
599
+ "",
600
+ "### Suggestions",
601
+ "Three tabs (Low/Medium/High). Select a pick to highlight it on the plot.",
602
+ "_Plot is **always** CAPM E[r] vs historical σ; your CAPM point will never exceed the CML._"
603
+ ])
604
+
605
+ # pack suggestion meta for quick plot refresh on band selection
606
+ meta = {
607
+ "low": meta_low,
608
+ "med": meta_med,
609
+ "high": meta_high,
610
+ "plot": {"rf": rf_ann, "erp": erp_ann, "sigma_mkt": sigma_mkt, "sigma_hist": sigma_hist, "mu_capm": mu_capm}
611
+ }
612
+
613
+ uni_msg = f"Universe set to: {', '.join(UNIVERSE)}"
614
+
615
+ # outputs:
616
+ # plot, summary, universe, positions,
617
+ # low tables (3), medium tables (3), high tables (3),
618
+ # efficient tables (same σ, same μ),
619
+ # meta json, status
620
+ return (
621
+ img, info, uni_msg, pos_table,
622
+ tbls_low[0], tbls_low[1], tbls_low[2],
623
+ tbls_med[0], tbls_med[1], tbls_med[2],
624
+ tbls_high[0], tbls_high[1], tbls_high[2],
625
+ eff_same_sigma_tbl, eff_same_mu_tbl,
626
+ json.dumps(meta), (csv_path or "")
627
+ )
628
+
629
+ def highlight_from_pick(meta_json: str, band: str, pick_idx: int):
630
+ try:
631
+ meta = json.loads(meta_json)
632
+ plotp = meta.get("plot", {})
633
+ rf = float(plotp["rf"]); erp = float(plotp["erp"]); sigma_mkt = float(plotp["sigma_mkt"])
634
+ sigma_hist = float(plotp["sigma_hist"]); mu_capm = float(plotp["mu_capm"])
635
+ arr = meta["low" if band=="Low" else "med" if band=="Medium" else "high"]
636
+ i = int(max(1, min(3, pick_idx or 1))) - 1
637
+ sel = arr[i]
638
+ return plot_cml(rf, erp, sigma_mkt, sigma_hist, mu_capm, sel["mu"], sel["sigma"])
639
+ except Exception as e:
640
+ # if anything fails, fall back to no suggestion highlighted
641
+ return None
642
+
643
+ # -------------- UI --------------
644
+ def clamp13(i: int): return int(max(1, min(3, int(i or 1))))
645
+
646
+ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
647
+ gr.Markdown(
648
+ "## Efficient Portfolio Advisor\n"
649
+ "Search symbols, enter **dollar amounts**, set horizon. Data uses Yahoo monthly prices; risk-free from FRED.\n\n"
650
+ "**Plot:** CAPM E[r] vs historical σ on the **CML**.\n"
651
+ "**Efficient mixes:** CML portfolio with **same σ** and CML portfolio with **same E[r]** as yours.\n"
652
+ "**Suggestions:** 1,000 long-only mixes from your universe → 3 picks per risk band using exposure+embeddings with MMR diversity."
653
+ )
654
+
655
+ with gr.Row():
656
+ with gr.Column(scale=1):
657
+ q = gr.Textbox(label="Search symbol")
658
+ search_note = gr.Markdown()
659
+ matches = gr.Dropdown(choices=[], label="Matches")
660
+ search_btn = gr.Button("Search")
661
+ add_btn = gr.Button("Add selected to portfolio")
662
+
663
+ gr.Markdown("### Portfolio positions (enter $ amounts; negatives allowed for your input)")
664
+ table = gr.Dataframe(
665
+ headers=["ticker", "amount_usd"],
666
+ datatype=["str", "number"],
667
+ row_count=0,
668
+ col_count=(2, "fixed"),
669
+ type="pandas" # Gradio 5-friendly
670
+ )
671
+
672
+ horizon = gr.Number(label="Horizon in years (1–100)", value=HORIZON_YEARS, precision=0)
673
+ lookback = gr.Slider(1, 15, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for betas & covariances")
674
+
675
+ run_btn = gr.Button("Compute (build dataset & suggest)")
676
+ with gr.Column(scale=1):
677
+ plot = gr.Image(label="Capital Market Line (CAPM)", type="pil")
678
+ summary = gr.Markdown(label="Inputs & Results")
679
+ universe_msg = gr.Textbox(label="Universe status / Horizon", interactive=False)
680
+
681
+ positions = gr.Dataframe(
682
+ label="Computed positions",
683
+ headers=["ticker", "amount_usd", "weight_exposure", "beta"],
684
+ datatype=["str", "number", "number", "number"],
685
+ col_count=(4, "fixed"),
686
+ value=empty_positions_df(),
687
+ interactive=False,
688
+ type="pandas"
689
+ )
690
+
691
+ # Suggestions area: three tabs, each 3 picks
692
+ meta_box = gr.Textbox(value="{}", visible=False, label="meta")
693
+ csv_path = gr.File(label="Generated dataset CSV", value=None, visible=True)
694
+
695
+ with gr.Tab("Low"):
696
  with gr.Row():
697
+ low1 = gr.Dataframe(label="Pick #1", interactive=False, type="pandas")
698
+ low2 = gr.Dataframe(label="Pick #2", interactive=False, type="pandas")
699
+ low3 = gr.Dataframe(label="Pick #3", interactive=False, type="pandas")
700
+ pick_low = gr.Slider(1, 3, value=1, step=1, label="Highlight pick")
701
+ low_btn = gr.Button("Show on plot")
702
+
703
+ with gr.Tab("Medium"):
704
+ with gr.Row():
705
+ med1 = gr.Dataframe(label="Pick #1", interactive=False, type="pandas")
706
+ med2 = gr.Dataframe(label="Pick #2", interactive=False, type="pandas")
707
+ med3 = gr.Dataframe(label="Pick #3", interactive=False, type="pandas")
708
+ pick_med = gr.Slider(1, 3, value=1, step=1, label="Highlight pick")
709
+ med_btn = gr.Button("Show on plot")
710
+
711
+ with gr.Tab("High"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
712
  with gr.Row():
713
+ high1 = gr.Dataframe(label="Pick #1", interactive=False, type="pandas")
714
+ high2 = gr.Dataframe(label="Pick #2", interactive=False, type="pandas")
715
+ high3 = gr.Dataframe(label="Pick #3", interactive=False, type="pandas")
716
+ pick_high = gr.Slider(1, 3, value=1, step=1, label="Highlight pick")
717
+ high_btn = gr.Button("Show on plot")
718
+
719
+ gr.Markdown("### Efficient alternatives on the CML")
720
+ eff_same_sigma_tbl = gr.Dataframe(label="Efficient: Same σ", interactive=False, type="pandas")
721
+ eff_same_mu_tbl = gr.Dataframe(label="Efficient: Same μ", interactive=False, type="pandas")
722
+
723
+ # wire search / add / locking / horizon
724
+ search_btn.click(fn=search_tickers_cb, inputs=q, outputs=[search_note, matches])
725
+ add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
726
+ table.change(fn=lock_ticker_column, inputs=table, outputs=table)
727
+ horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
728
+
729
+ # main compute
730
+ run_btn.click(
731
+ fn=compute,
732
+ inputs=[lookback, table, gr.State(1), gr.State(1), gr.State(1)],
733
+ outputs=[
734
+ plot, summary, universe_msg, positions,
735
+ low1, low2, low3,
736
+ med1, med2, med3,
737
+ high1, high2, high3,
738
+ eff_same_sigma_tbl, eff_same_mu_tbl,
739
+ meta_box, csv_path
740
+ ]
741
+ )
742
+
743
+ # highlight buttons refresh plot with selected suggestion
744
+ low_btn.click(fn=highlight_from_pick, inputs=[meta_box, gr.State("Low"), pick_low], outputs=plot)
745
+ med_btn.click(fn=highlight_from_pick, inputs=[meta_box, gr.State("Medium"), pick_med], outputs=plot)
746
+ high_btn.click(fn=highlight_from_pick, inputs=[meta_box, gr.State("High"), pick_high], outputs=plot)
747
+
748
+ # initialize risk-free at launch
749
+ RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
750
+ RF_ANN = fetch_fred_yield_annual(RF_CODE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
751
 
752
  if __name__ == "__main__":
753
+ # On Hugging Face Spaces you don't need share=True; binding to 0.0.0.0 is fine
754
  demo.launch(server_name="0.0.0.0", server_port=7860)