Tulitula commited on
Commit
4c6ebab
·
verified ·
1 Parent(s): bbc558b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +320 -322
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # app.py
2
- import os, io, math, time, warnings, json, random
3
  warnings.filterwarnings("ignore")
4
 
5
  from typing import List, Tuple, Dict, Optional
@@ -12,24 +12,31 @@ import requests
12
  import yfinance as yf
13
  import gradio as gr
14
 
 
 
 
 
 
 
 
 
 
 
 
15
  # ---------------- config ----------------
16
  DATA_DIR = "data"
17
  os.makedirs(DATA_DIR, exist_ok=True)
18
 
19
  MAX_TICKERS = 30
20
  DEFAULT_LOOKBACK_YEARS = 10
 
21
 
22
- # Market proxy used only for CAPM/CML math. We never add it to your portfolio or suggestions.
23
- MARKET_PROXY = "VOO"
24
-
25
- SYNTH_ROWS = 1000 # size of generated dataset for suggestions
26
- EMBED_ALPHA = 0.6 # exposure-sim weight in score (1-alpha uses text embeddings)
27
- MMR_LAMBDA = 0.7 # diversity for MMR (higher favors quality over diversity)
28
 
29
  # Globals that update with horizon changes
30
  HORIZON_YEARS = 10
31
  RF_CODE = "DGS10"
32
- RF_ANN = 0.0375 # updated at launch
33
 
34
  # ---------------- helpers ----------------
35
  def fred_series_for_horizon(years: float) -> str:
@@ -72,6 +79,7 @@ def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
72
 
73
  if isinstance(df, pd.Series):
74
  df = df.to_frame()
 
75
  if isinstance(df.columns, pd.MultiIndex):
76
  lvl0 = [str(x) for x in df.columns.get_level_values(0).unique()]
77
  if "Close" in lvl0:
@@ -113,31 +121,29 @@ def yahoo_search(query: str):
113
 
114
  def validate_tickers(symbols: List[str], years: int) -> List[str]:
115
  base = [s for s in dict.fromkeys([t.upper().strip() for t in symbols]) if s]
116
- # We fetch base + MARKET_PROXY only to compute CAPM, but we don't add MARKET_PROXY to suggestions.
117
- px = fetch_prices_monthly(base + [MARKET_PROXY], years)
118
  ok = [s for s in base if s in px.columns]
119
- # Need market proxy data available; otherwise we cannot compute β/ERP/CML.
120
- if MARKET_PROXY not in px.columns:
121
- return [] # cannot proceed without market series
122
  return ok
123
 
124
- # -------------- aligned moments (vs market proxy) --------------
125
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
126
- uniq = [c for c in dict.fromkeys(symbols) if c != MARKET_PROXY]
127
- tickers = uniq + [MARKET_PROXY]
128
  px = fetch_prices_monthly(tickers, years)
129
  rets = monthly_returns(px)
130
- cols = [c for c in uniq if c in rets.columns] + ([MARKET_PROXY] if MARKET_PROXY in rets.columns else [])
131
  R = rets[cols].dropna(how="any")
132
  return R.loc[:, ~R.columns.duplicated()]
133
 
134
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
135
  R = get_aligned_monthly_returns(symbols, years)
136
- if MARKET_PROXY not in R.columns or len(R) < 3:
137
  raise ValueError("Not enough aligned data with market proxy.")
138
- rf_m = rf_ann / 12.0
139
 
140
- m = R[MARKET_PROXY]
141
  if isinstance(m, pd.DataFrame):
142
  m = m.iloc[:, 0].squeeze()
143
 
@@ -145,17 +151,19 @@ def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
145
  sigma_m_ann = float(m.std(ddof=1) * math.sqrt(12.0))
146
  erp_ann = float(mu_m_ann - rf_ann)
147
 
 
148
  ex_m = m - rf_m
149
  var_m = float(np.var(ex_m.values, ddof=1))
150
  var_m = max(var_m, 1e-9)
151
 
152
  betas: Dict[str, float] = {}
153
- for s in [c for c in R.columns if c != MARKET_PROXY]:
154
  ex_s = R[s] - rf_m
155
  cov_sm = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1])
156
  betas[s] = cov_sm / var_m
 
157
 
158
- asset_cols = [c for c in R.columns if c != MARKET_PROXY]
159
  cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
160
  covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
161
 
@@ -182,6 +190,7 @@ def portfolio_stats(weights: Dict[str, float],
182
  return beta_p, mu_capm, sigma_hist
183
 
184
  def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
 
185
  if sigma_mkt <= 1e-12:
186
  return 0.0, 1.0, rf_ann
187
  a = sigma_target / sigma_mkt
@@ -193,14 +202,9 @@ def efficient_same_return(mu_target: float, rf_ann: float, erp_ann: float, sigma
193
  a = (mu_target - rf_ann) / erp_ann
194
  return a, 1.0 - a, abs(a) * sigma_mkt
195
 
196
- # -------------- plotting (CAPM on CML) --------------
197
- def _pct(x): return np.asarray(x, dtype=float) * 100.0
198
-
199
- def _clamp_to_cml_y(mu_capm, sigma_hist, rf_ann, erp_ann, sigma_mkt):
200
- # Return y that never exceeds CML at given (historical) sigma
201
- slope = erp_ann / max(sigma_mkt, 1e-12)
202
- y_cml = rf_ann + slope * max(0.0, float(sigma_hist))
203
- return float(min(mu_capm, y_cml))
204
 
205
  def plot_cml(rf_ann, erp_ann, sigma_mkt,
206
  sigma_hist_p, mu_capm_p,
@@ -211,30 +215,27 @@ def plot_cml(rf_ann, erp_ann, sigma_mkt,
211
 
212
  xmax = max(0.3, sigma_mkt * 2.4, (sigma_hist_p or 0.0) * 1.6, (sugg_sigma_hist or 0.0) * 1.6)
213
  xs = np.linspace(0, xmax, 200)
214
- cml = rf_ann + (erp_ann / max(sigma_mkt, 1e-9)) * xs
 
215
 
216
  plt.plot(_pct(xs), _pct(cml), label="CML (Market/Bills)", linewidth=1.8)
217
  plt.scatter([_pct(0)], [_pct(rf_ann)], label="Risk-free")
218
  plt.scatter([_pct(sigma_mkt)], [_pct(rf_ann + erp_ann)], label="Market")
219
 
220
- # Your CAPM point (y clamped under CML; x = historical σ)
221
- y_you = _clamp_to_cml_y(mu_capm_p, sigma_hist_p, rf_ann, erp_ann, sigma_mkt)
 
222
  plt.scatter([_pct(sigma_hist_p)], [_pct(y_you)], label="Your CAPM point")
223
 
224
- # Efficient points
225
- plt.scatter([_pct(same_mu_sigma)], [_pct(same_sigma_mu)], label="Efficient (same σ)", marker="^")
226
- plt.scatter([_pct(same_mu_sigma)], [_pct(same_sigma_mu)], marker="^") # ensure visible
227
-
228
- plt.scatter([_pct(same_mu_sigma)], [_pct(same_sigma_mu)], marker="^")
229
-
230
- a_mu_sigma = same_mu_sigma
231
- a_sigma_mu = same_sigma_mu
232
- plt.scatter([_pct(a_mu_sigma)], [_pct(a_sigma_mu)], marker="^", label="Efficient (same E[r])")
233
 
234
- # Selected suggestion (if any)
235
  if sugg_sigma_hist is not None and sugg_mu_capm is not None:
236
- y_s = _clamp_to_cml_y(sugg_mu_capm, sugg_sigma_hist, rf_ann, erp_ann, sigma_mkt)
237
- plt.scatter([_pct(sugg_sigma_hist)], [_pct(y_s)], label="Selected Suggestion", marker="X", s=60)
 
238
 
239
  plt.xlabel("σ (historical, annualized, %)")
240
  plt.ylabel("CAPM E[r] (annual, %)")
@@ -247,24 +248,30 @@ def plot_cml(rf_ann, erp_ann, sigma_mkt,
247
  buf.seek(0)
248
  return Image.open(buf)
249
 
250
- # -------------- synthetic dataset (from current universe only) --------------
251
- def build_synthetic_dataset(universe: List[str],
252
  covA: pd.DataFrame,
253
  betas: Dict[str, float],
254
  rf_ann: float,
255
  erp_ann: float,
256
  sigma_mkt: float,
257
  n_rows: int = SYNTH_ROWS) -> pd.DataFrame:
 
 
 
 
258
  rng = np.random.default_rng(12345)
259
- assets = list(universe)
260
  if not assets:
261
- return pd.DataFrame(columns=["tickers","weights","beta","mu_capm","sigma_hist"])
 
 
262
 
263
  rows = []
264
  for _ in range(n_rows):
265
- k = int(rng.integers(low=2, high=min(8, len(assets)) + 1))
266
  picks = list(rng.choice(assets, size=k, replace=False))
267
- w = rng.dirichlet(np.ones(k)) # long-only, sum=1
268
  beta_p = float(np.dot([betas.get(t, 0.0) for t in picks], w))
269
  mu_capm = capm_er(beta_p, rf_ann, erp_ann)
270
  sub = covA.reindex(index=picks, columns=picks).fillna(0.0).to_numpy()
@@ -272,15 +279,14 @@ def build_synthetic_dataset(universe: List[str],
272
 
273
  rows.append({
274
  "tickers": ",".join(picks),
275
- "weights": ",".join(f"{x:.8f}" for x in w),
276
  "beta": beta_p,
277
  "mu_capm": mu_capm,
278
  "sigma_hist": sigma_hist
279
  })
280
  return pd.DataFrame(rows)
281
 
282
- # ---- band helpers (by historical sigma) ----
283
- def _band_bounds_sigma_hist(sigma_mkt: float, band: str) -> Tuple[float, float]:
284
  band = (band or "Medium").strip().lower()
285
  if band.startswith("low"):
286
  return 0.0, 0.8 * sigma_mkt
@@ -288,135 +294,92 @@ def _band_bounds_sigma_hist(sigma_mkt: float, band: str) -> Tuple[float, float]:
288
  return 1.2 * sigma_mkt, 3.0 * sigma_mkt
289
  return 0.8 * sigma_mkt, 1.2 * sigma_mkt
290
 
291
- def candidates_for_band(synth: pd.DataFrame, sigma_mkt: float, band: str):
292
- """Return (band_df, used_fallback) ensuring Low/Medium/High are monotone in σ."""
293
- lo, hi = _band_bounds_sigma_hist(sigma_mkt, band)
294
- band_df = synth[(synth["sigma_hist"] >= lo) & (synth["sigma_hist"] <= hi)]
295
- if not band_df.empty:
296
- return band_df.copy(), False
297
-
298
- # widen gradually
299
- widen = 0.15
300
- for _ in range(4):
301
- lo = max(0.0, lo * (1.0 - widen))
302
- hi = hi * (1.0 + widen)
303
- band_df = synth[(synth["sigma_hist"] >= lo) & (synth["sigma_hist"] <= hi)]
304
- if not band_df.empty:
305
- return band_df.copy(), True
306
-
307
- # quantile fallback
308
- q1 = synth["sigma_hist"].quantile(0.33)
309
- q2 = synth["sigma_hist"].quantile(0.66)
310
- b = (band or "medium").lower()
311
- if b.startswith("low"):
312
- band_df = synth[synth["sigma_hist"] <= q1]
313
- elif b.startswith("high"):
314
- band_df = synth[synth["sigma_hist"] >= q2]
315
- else:
316
- band_df = synth[(synth["sigma_hist"] > q1) & (synth["sigma_hist"] < q2)]
317
- return band_df.copy(), True
318
-
319
- # -------------- Embeddings & scoring (always on; fail gracefully) --------------
320
- def _load_st_model():
321
  try:
322
  from sentence_transformers import SentenceTransformer
323
- return SentenceTransformer("FinLang/finance-embeddings-investopedia")
 
 
 
 
 
 
 
324
  except Exception:
325
- return None
326
-
327
- def _encode(model, texts: List[str]) -> np.ndarray:
328
- if model is None:
329
- # Offline fallback: deterministic pseudo-embeddings from hash
330
- rng = np.random.default_rng(42)
331
- return rng.normal(size=(len(texts), 384)).astype(np.float32)
332
- vecs = model.encode(texts, show_progress_bar=False, normalize_embeddings=True)
333
- return np.asarray(vecs, dtype=np.float32)
334
-
335
- def _portfolio_embedding(tickers: List[str], weights: List[float]) -> np.ndarray:
336
- model = _load_st_model()
337
- texts = [f"ticker {t}" for t in tickers]
338
- embs = _encode(model, texts)
339
- w = np.asarray(weights, dtype=float)
340
- s = np.sum(np.abs(w)) or 1.0
341
- w = np.abs(w) / s
342
- v = (embs * w[:, None]).sum(axis=0, keepdims=False)
343
- n = np.linalg.norm(v) or 1.0
344
- return (v / n).astype(np.float32)
345
-
346
- def _cos_sim(a: np.ndarray, b: np.ndarray) -> float:
347
- da = float(np.linalg.norm(a)); db = float(np.linalg.norm(b))
348
- if da <= 1e-12 or db <= 1e-12: return 0.0
349
- return float(np.dot(a, b) / (da * db))
350
-
351
- def _exposure_similarity(u: Dict[str, float], c: Dict[str, float]) -> float:
352
- # 1 - 0.5*L1 distance over common union; in [0,1]
353
- keys = sorted(set(u.keys()) | set(c.keys()))
354
- uvec = np.array([u.get(k, 0.0) for k in keys]); uvec = np.abs(uvec) / (np.sum(np.abs(uvec)) or 1.0)
355
- cvec = np.array([c.get(k, 0.0) for k in keys]); cvec = np.abs(cvec) / (np.sum(np.abs(cvec)) or 1.0)
356
- dist = float(np.sum(np.abs(uvec - cvec)))
357
- return float(max(0.0, 1.0 - 0.5 * dist))
358
-
359
- def _mmr_select(rows: pd.DataFrame, scores: np.ndarray, topk: int, lam: float) -> List[int]:
360
- if len(rows) <= topk:
361
- return list(range(len(rows)))
362
- chosen = []
363
- cand = list(range(len(rows)))
364
- sims_cache = {}
365
- # precompute embeddings of candidates for diversity
366
- embs = []
367
- for _, r in rows.iterrows():
368
- ts = [t.strip().upper() for t in str(r["tickers"]).split(",")]
369
- ws = [float(x) for x in str(r["weights"]).split(",")]
370
- embs.append(_portfolio_embedding(ts, ws))
371
- embs = np.stack(embs, axis=0)
372
-
373
- while len(chosen) < topk and cand:
374
- # pick argmax of lam*score - (1-lam)*max_sim_to_chosen
375
- best_i = None; best_val = -1e9
376
- for i in cand:
377
- if not chosen:
378
- val = float(scores[i])
379
- else:
380
- max_sim = max(_cos_sim(embs[i], embs[j]) for j in chosen)
381
- val = lam * float(scores[i]) - (1.0 - lam) * float(max_sim)
382
- if val > best_val:
383
- best_val, best_i = val, i
384
- chosen.append(best_i)
385
- cand.remove(best_i)
386
- return chosen
387
-
388
- def pick_best_in_band(user_df: pd.DataFrame,
389
- band_df: pd.DataFrame,
390
- alpha: float = EMBED_ALPHA,
391
- top_N: int = 50) -> pd.Series:
392
- if band_df.empty:
393
- return pd.Series(dtype="float64")
394
- try:
395
- band_df = band_df.sort_values("mu_capm", ascending=False).head(top_N).reset_index(drop=True)
396
-
397
- u_t = user_df["ticker"].astype(str).str.upper().tolist()
398
- u_w = pd.to_numeric(user_df["amount_usd"], errors="coerce").fillna(0.0).tolist()
399
- u_map = {t: float(w) for t, w in zip(u_t, u_w)}
400
- u_embed = _portfolio_embedding(u_t, u_w)
401
-
402
- scores = []
403
- for _, r in band_df.iterrows():
404
- ts = [t.strip().upper() for t in str(r["tickers"]).split(",")]
405
- ws = [float(x) for x in str(r["weights"]).split(",")]
406
- s = sum(max(0.0, w) for w in ws) or 1.0
407
- ws = [max(0.0, w) / s for w in ws]
408
- c_map = {t: w for t, w in zip(ts, ws)}
409
- c_embed = _portfolio_embedding(ts, ws)
410
- expo_sim = _exposure_similarity(u_map, c_map)
411
- emb_sim = _cos_sim(u_embed, c_embed)
412
- scores.append(alpha * expo_sim + (1.0 - alpha) * emb_sim)
413
-
414
- # Take the best after MMR top-3 selection (but return only #1)
415
- top_idxs = _mmr_select(band_df, np.asarray(scores), topk=3, lam=MMR_LAMBDA)
416
- best_idx = top_idxs[0]
417
- return band_df.iloc[best_idx]
418
- except Exception:
419
- return band_df.iloc[0]
420
 
421
  # -------------- UI helpers --------------
422
  def empty_positions_df():
@@ -450,7 +413,6 @@ def add_symbol(selection: str, table: Optional[pd.DataFrame]):
450
  current = [str(x).upper() for x in table["ticker"].tolist() if str(x) != "nan"]
451
  tickers = current if symbol in current else current + [symbol]
452
 
453
- # do NOT auto-add MARKET_PROXY; validate uses it only for data fetch
454
  val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
455
  tickers = [t for t in tickers if t in val]
456
 
@@ -477,11 +439,23 @@ def lock_ticker_column(tb: Optional[pd.DataFrame]):
477
  amounts = amounts[:len(tickers)] + [0.0] * max(0, len(tickers) - len(amounts))
478
  return pd.DataFrame({"ticker": tickers, "amount_usd": amounts})
479
 
480
- # ---- compute all once; then we switch the displayed band without recomputing ----
481
- def compute_all(
 
 
 
 
 
 
 
 
 
 
 
 
482
  years_lookback: int,
483
  table: Optional[pd.DataFrame],
484
- risk_horizon_years: float
485
  ):
486
  # sanitize table
487
  if isinstance(table, pd.DataFrame):
@@ -496,51 +470,71 @@ def compute_all(
496
 
497
  symbols = [t for t in df["ticker"].tolist() if t]
498
  if len(symbols) == 0:
499
- return {"error": "Add at least one ticker."}
 
500
 
501
  symbols = validate_tickers(symbols, years_lookback)
502
  if len(symbols) == 0:
503
- return {"error": f"Could not validate any tickers (also need market data for {MARKET_PROXY})."}
 
 
 
 
504
 
505
- amounts = {t: float(df[df["ticker"] == t]["amount_usd"].iloc[0]) for t in symbols}
 
506
  rf_ann = RF_ANN
507
 
508
- # Moments vs market proxy
509
  moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
510
  betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
511
 
512
- # Weights (exposures)
513
  gross = sum(abs(v) for v in amounts.values())
514
  if gross <= 1e-12:
515
- return {"error": "All amounts are zero."}
 
516
  weights = {k: v / gross for k, v in amounts.items()}
517
 
518
  # Portfolio CAPM stats
519
  beta_p, mu_capm, sigma_hist = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
520
 
521
- # Efficient alternatives (market/bills)
522
- a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_hist, rf_ann, erp_ann, sigma_mkt)
523
- a_mu, b_mu, sigma_eff_mu = efficient_same_return(mu_capm, rf_ann, erp_ann, sigma_mkt)
524
 
525
- # Synthetic dataset & suggestions (universe = user's tickers only)
526
- synth = build_synthetic_dataset(symbols, covA, betas, rf_ann, erp_ann, sigma_mkt, n_rows=SYNTH_ROWS)
 
527
  csv_path = os.path.join(DATA_DIR, f"investor_profiles_{int(time.time())}.csv")
528
  try:
529
  synth.to_csv(csv_path, index=False)
530
  except Exception:
531
  csv_path = None
532
 
533
- # one suggestion per band
534
- def best_for_band(band: str):
535
- band_df, used_fallback = candidates_for_band(synth, sigma_mkt, band)
536
- user_df = pd.DataFrame({"ticker": list(weights.keys()),
537
- "amount_usd": [amounts[t] for t in weights.keys()]})
538
- row = pick_best_in_band(user_df, band_df, EMBED_ALPHA, top_N=50)
539
- return row, used_fallback
540
-
541
- best_low, low_fb = best_for_band("Low")
542
- best_med, med_fb = best_for_band("Medium")
543
- best_high, high_fb = best_for_band("High")
 
 
 
 
 
 
 
 
 
 
 
 
544
 
545
  # positions table
546
  pos_table = pd.DataFrame(
@@ -548,11 +542,19 @@ def compute_all(
548
  "ticker": t,
549
  "amount_usd": amounts.get(t, 0.0),
550
  "weight_exposure": weights.get(t, 0.0),
551
- "beta": betas.get(t, np.nan)
552
  } for t in symbols],
553
  columns=["ticker", "amount_usd", "weight_exposure", "beta"]
554
  )
555
 
 
 
 
 
 
 
 
 
556
  info = "\n".join([
557
  "### Inputs",
558
  f"- Lookback years {years_lookback}",
@@ -562,120 +564,81 @@ def compute_all(
562
  f"- Market σ (hist) {sigma_mkt:.2%}",
563
  "",
564
  "### Your portfolio (CAPM on CML; x=σ_hist, y=CAPM E[r])",
565
- f"- Beta {beta_p:.2f}",
566
  f"- CAPM E[r] {mu_capm:.2%}",
567
  f"- σ (historical) {sigma_hist:.2%}",
568
  "",
569
  "### Efficient market/bills mixes",
570
- f"- Same σ as your portfolio: Market {a_sigma:.2f}, Bills {b_sigma:.2f} → E[r] {mu_eff_sigma:.2%}",
571
- f"- Same E[r] as your portfolio: Market {a_mu:.2f}, Bills {b_mu:.2f} → σ {sigma_eff_mu:.2%}",
572
  "",
573
- "_All plotted points are on/under the CML; if CAPM E[r] exceeds the CML at a given σ, we clamp to CML for visualization._"
574
  ])
575
 
576
- outs = dict(
577
- ok=True,
578
- rf_ann=rf_ann, erp_ann=erp_ann, sigma_mkt=sigma_mkt,
579
- sigma_hist=sigma_hist, mu_capm=mu_capm,
580
- same_sigma_mu=mu_eff_sigma, same_mu_sigma=sigma_eff_mu,
581
- positions=pos_table, csv_path=csv_path, symbols=symbols,
582
- amounts=amounts, weights=weights,
583
- best_low=best_low, best_med=best_med, best_high=best_high,
584
- low_fb=low_fb, med_fb=med_fb, high_fb=high_fb,
585
- budget=gross
586
  )
587
- return outs
588
 
589
- def _row_to_table(row: pd.Series, budget: float) -> pd.DataFrame:
590
- if row is None or row.empty:
591
- return empty_suggestion_df()
592
- ts = [t.strip().upper() for t in str(row["tickers"]).split(",")]
593
- ws = [float(x) for x in str(row["weights"]).split(",")]
594
- s = sum(max(0.0, w) for w in ws) or 1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
  ws = [max(0.0, w) / s for w in ws]
596
- return pd.DataFrame(
 
597
  [{"ticker": t, "weight_%": round(w*100.0, 2), "amount_$": round(w*budget, 0)} for t, w in zip(ts, ws)],
598
  columns=["ticker", "weight_%", "amount_$"]
599
  )
600
 
601
- def _band_stats(label: str, s: pd.Series, used_fallback: bool) -> str:
602
- if s is None or s.empty:
603
- return f"**{label}:** —"
604
- tag = " *(fallback)*" if used_fallback else ""
605
- return (f"**{label}:** CAPM E[r] {float(s['mu_capm'])*100:.2f}%, "
606
- f"σ(h) {float(s['sigma_hist'])*100:.2f}%{tag}")
607
-
608
- def render_with_band(outs: dict, band: str):
609
- if not outs.get("ok", False):
610
- msg = outs.get("error", "Unknown error.")
611
- return None, msg, msg, empty_positions_df(), empty_suggestion_df(), None, "—", "—", "—"
612
-
613
- rf_ann, erp_ann, sigma_mkt = outs["rf_ann"], outs["erp_ann"], outs["sigma_mkt"]
614
- sigma_hist, mu_capm = outs["sigma_hist"], outs["mu_capm"]
615
- same_sigma_mu, same_mu_sigma = outs["same_sigma_mu"], outs["same_mu_sigma"]
616
-
617
- pick = outs["best_low"] if band == "Low" else outs["best_high"] if band == "High" else outs["best_med"]
618
- sugg_sigma = float(pick["sigma_hist"]) if (pick is not None and not pick.empty) else None
619
- sugg_mu = float(pick["mu_capm"]) if (pick is not None and not pick.empty) else None
620
-
621
  img = plot_cml(
622
  rf_ann, erp_ann, sigma_mkt,
623
  sigma_hist, mu_capm,
624
  same_sigma_mu, same_mu_sigma,
625
- sugg_sigma_hist=sugg_sigma, sugg_mu_capm=sugg_mu
626
  )
627
-
628
- low_stats = _band_stats("Low", outs["best_low"], outs["low_fb"])
629
- med_stats = _band_stats("Medium", outs["best_med"], outs["med_fb"])
630
- high_stats = _band_stats("High", outs["best_high"], outs["high_fb"])
631
-
632
- sugg_table = _row_to_table(pick, outs["budget"])
633
- positions = outs["positions"]
634
- csv_path = outs["csv_path"]
635
-
636
- # We also show universe status as text
637
- uni_msg = f"Universe set to: {', '.join(outs['symbols'])}"
638
- summary = "\n" + (render_summary_text := "") # placeholder so we keep existing 'info' below
639
-
640
- # Use the prebuilt summary string from compute_all for the right panel
641
- info_lines = [
642
- "### Inputs",
643
- f"- Lookback years {int(DEFAULT_LOOKBACK_YEARS)}",
644
- f"- Horizon years {int(round(HORIZON_YEARS))}",
645
- f"- Risk-free {rf_ann:.2%} from {RF_CODE}",
646
- f"- Market ERP {erp_ann:.2%}",
647
- f"- Market σ (hist) {sigma_mkt:.2%}",
648
- "",
649
- "### Your portfolio (CAPM on CML; x=σ_hist, y=CAPM E[r])",
650
- f"- CAPM E[r] {mu_capm:.2%}",
651
- f"- σ (historical) {sigma_hist:.2%}",
652
- "",
653
- "### Efficient market/bills mixes",
654
- f"- Same σ: E[r] {same_sigma_mu:.2%}",
655
- f"- Same E[r]: σ {same_mu_sigma:.2%}",
656
- ]
657
- info = "\n".join(info_lines)
658
-
659
- return img, info, uni_msg, positions, sugg_table, csv_path, low_stats, med_stats, high_stats
660
 
661
  # -------------- UI --------------
662
  with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
663
  gr.Markdown(
664
  "## Efficient Portfolio Advisor\n"
665
- "Enter **$ amounts** for your tickers (negatives allowed), set horizon. "
666
- "Plot shows your **CAPM point on the CML** using historical σ on the x-axis. "
667
- "Suggestions are generated from your tickers only; embeddings + MMR are always on."
668
  )
669
 
670
- state = gr.State(value=None) # stores compute_all outputs
671
-
672
  with gr.Row():
673
  with gr.Column(scale=1):
674
  q = gr.Textbox(label="Search symbol")
675
  search_note = gr.Markdown()
676
  matches = gr.Dropdown(choices=[], label="Matches")
677
- search_btn = gr.Button("Search")
678
- add_btn = gr.Button("Add selected to portfolio")
 
679
 
680
  gr.Markdown("### Portfolio positions (enter $ amounts; negatives allowed)")
681
  table = gr.Dataframe(
@@ -693,10 +656,9 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
693
  btn_low = gr.Button("Show Low")
694
  btn_med = gr.Button("Show Medium")
695
  btn_high = gr.Button("Show High")
696
-
697
- low_line = gr.Markdown(value="**Low:** —")
698
- med_line = gr.Markdown(value="**Medium:** —")
699
- high_line = gr.Markdown(value="**High:** —")
700
 
701
  run_btn = gr.Button("Compute (build dataset & suggest)")
702
  with gr.Column(scale=1):
@@ -712,7 +674,7 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
712
  interactive=False
713
  )
714
  sugg_table = gr.Dataframe(
715
- label="Selected suggestion holdings shown in % and $ (from *your* tickers only)",
716
  headers=["ticker", "weight_%", "amount_$"],
717
  datatype=["str", "number", "number"],
718
  col_count=(3, "fixed"),
@@ -721,6 +683,20 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
721
  )
722
  dl = gr.File(label="Generated dataset CSV", value=None, visible=True)
723
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
724
  # wire search / add / locking / horizon
725
  search_btn.click(fn=search_tickers_cb, inputs=q, outputs=[search_note, matches])
726
  add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
@@ -728,40 +704,62 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
728
  horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
729
 
730
  # main compute
731
- def _compute_and_show(lookback_v, table_v, horizon_v):
732
- outs = compute_all(int(lookback_v), table_v, float(horizon_v))
733
- if not outs.get("ok", False):
734
- err = outs.get("error", "Unable to compute.")
735
- # return blank UI + error in summary
736
- return (outs, None, f"**Error:** {err}", err,
737
- empty_positions_df(), empty_suggestion_df(), None, "—","—","—")
738
- # default show Medium
739
- img, info, uni_msg, pos, st, csv_path, low_s, med_s, high_s = render_with_band(outs, "Medium")
740
- return (outs, img, info, uni_msg, pos, st, csv_path, low_s, med_s, high_s)
741
 
742
  run_btn.click(
743
- fn=_compute_and_show,
744
- inputs=[lookback, table, horizon],
745
- outputs=[state, plot, summary, universe_msg, positions, sugg_table, dl, low_line, med_line, high_line]
 
 
 
 
 
746
  )
747
 
748
- # band buttons (no recompute; reuse state)
749
- def _show_band(outs, band):
750
- if outs is None:
751
- return None, "Click Compute first.", "", empty_positions_df(), empty_suggestion_df(), None
752
- return render_with_band(outs, band)
753
-
754
- btn_low.click(fn=_show_band, inputs=[state, gr.Textbox(value="Low", visible=False)],
755
- outputs=[plot, summary, universe_msg, positions, sugg_table, dl, low_line, med_line, high_line])
756
- btn_med.click(fn=_show_band, inputs=[state, gr.Textbox(value="Medium", visible=False)],
757
- outputs=[plot, summary, universe_msg, positions, sugg_table, dl, low_line, med_line, high_line])
758
- btn_high.click(fn=_show_band, inputs=[state, gr.Textbox(value="High", visible=False)],
759
- outputs=[plot, summary, universe_msg, positions, sugg_table, dl, low_line, med_line, high_line])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
 
761
  # initialize risk-free at launch
762
  RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
763
  RF_ANN = fetch_fred_yield_annual(RF_CODE)
764
 
765
  if __name__ == "__main__":
766
- # No concurrency_count here (Gradio 5); let the platform set host/port
767
- demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
 
1
  # app.py
2
+ import os, io, math, time, warnings, json
3
  warnings.filterwarnings("ignore")
4
 
5
  from typing import List, Tuple, Dict, Optional
 
12
  import yfinance as yf
13
  import gradio as gr
14
 
15
+ # ---- runtime niceties (avoid MPL/Cache warnings in containers) ----
16
+ os.environ.setdefault("MPLCONFIGDIR", os.getenv("MPLCONFIGDIR", "/home/user/.config/matplotlib"))
17
+ os.makedirs(os.environ["MPLCONFIGDIR"], exist_ok=True)
18
+ for d in [
19
+ "/home/user/.cache",
20
+ "/home/user/.cache/huggingface",
21
+ "/home/user/.cache/huggingface/hub",
22
+ "/home/user/.cache/sentencetransformers",
23
+ ]:
24
+ os.makedirs(d, exist_ok=True)
25
+
26
  # ---------------- config ----------------
27
  DATA_DIR = "data"
28
  os.makedirs(DATA_DIR, exist_ok=True)
29
 
30
  MAX_TICKERS = 30
31
  DEFAULT_LOOKBACK_YEARS = 10
32
+ MARKET_TICKER = "VOO"
33
 
34
+ SYNTH_ROWS = 1000 # synthetic candidate portfolios per compute
 
 
 
 
 
35
 
36
  # Globals that update with horizon changes
37
  HORIZON_YEARS = 10
38
  RF_CODE = "DGS10"
39
+ RF_ANN = 0.0375 # refreshed at launch
40
 
41
  # ---------------- helpers ----------------
42
  def fred_series_for_horizon(years: float) -> str:
 
79
 
80
  if isinstance(df, pd.Series):
81
  df = df.to_frame()
82
+
83
  if isinstance(df.columns, pd.MultiIndex):
84
  lvl0 = [str(x) for x in df.columns.get_level_values(0).unique()]
85
  if "Close" in lvl0:
 
121
 
122
  def validate_tickers(symbols: List[str], years: int) -> List[str]:
123
  base = [s for s in dict.fromkeys([t.upper().strip() for t in symbols]) if s]
124
+ px = fetch_prices_monthly(base + [MARKET_TICKER], years)
 
125
  ok = [s for s in base if s in px.columns]
126
+ # we require the market proxy to compute betas/ERP
127
+ if MARKET_TICKER not in px.columns:
128
+ return []
129
  return ok
130
 
131
+ # -------------- aligned moments --------------
132
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
133
+ uniq = [c for c in dict.fromkeys(symbols) if c != MARKET_TICKER]
134
+ tickers = uniq + [MARKET_TICKER]
135
  px = fetch_prices_monthly(tickers, years)
136
  rets = monthly_returns(px)
137
+ cols = [c for c in uniq if c in rets.columns] + ([MARKET_TICKER] if MARKET_TICKER in rets.columns else [])
138
  R = rets[cols].dropna(how="any")
139
  return R.loc[:, ~R.columns.duplicated()]
140
 
141
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
142
  R = get_aligned_monthly_returns(symbols, years)
143
+ if MARKET_TICKER not in R.columns or len(R) < 3:
144
  raise ValueError("Not enough aligned data with market proxy.")
 
145
 
146
+ m = R[MARKET_TICKER]
147
  if isinstance(m, pd.DataFrame):
148
  m = m.iloc[:, 0].squeeze()
149
 
 
151
  sigma_m_ann = float(m.std(ddof=1) * math.sqrt(12.0))
152
  erp_ann = float(mu_m_ann - rf_ann)
153
 
154
+ rf_m = rf_ann / 12.0
155
  ex_m = m - rf_m
156
  var_m = float(np.var(ex_m.values, ddof=1))
157
  var_m = max(var_m, 1e-9)
158
 
159
  betas: Dict[str, float] = {}
160
+ for s in [c for c in R.columns if c != MARKET_TICKER]:
161
  ex_s = R[s] - rf_m
162
  cov_sm = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1])
163
  betas[s] = cov_sm / var_m
164
+ betas[MARKET_TICKER] = 1.0
165
 
166
+ asset_cols = [c for c in R.columns if c != MARKET_TICKER]
167
  cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
168
  covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
169
 
 
190
  return beta_p, mu_capm, sigma_hist
191
 
192
  def efficient_same_sigma(sigma_target: float, rf_ann: float, erp_ann: float, sigma_mkt: float):
193
+ # weights on (Market, Bills) that achieve same sigma as target, on the CML
194
  if sigma_mkt <= 1e-12:
195
  return 0.0, 1.0, rf_ann
196
  a = sigma_target / sigma_mkt
 
202
  a = (mu_target - rf_ann) / erp_ann
203
  return a, 1.0 - a, abs(a) * sigma_mkt
204
 
205
+ # -------------- plotting (CAPM on CML; x=hist σ, y=CAPM E[r]) --------------
206
+ def _pct(x):
207
+ return np.asarray(x, dtype=float) * 100.0
 
 
 
 
 
208
 
209
  def plot_cml(rf_ann, erp_ann, sigma_mkt,
210
  sigma_hist_p, mu_capm_p,
 
215
 
216
  xmax = max(0.3, sigma_mkt * 2.4, (sigma_hist_p or 0.0) * 1.6, (sugg_sigma_hist or 0.0) * 1.6)
217
  xs = np.linspace(0, xmax, 200)
218
+ slope = erp_ann / max(sigma_mkt, 1e-9)
219
+ cml = rf_ann + slope * xs
220
 
221
  plt.plot(_pct(xs), _pct(cml), label="CML (Market/Bills)", linewidth=1.8)
222
  plt.scatter([_pct(0)], [_pct(rf_ann)], label="Risk-free")
223
  plt.scatter([_pct(sigma_mkt)], [_pct(rf_ann + erp_ann)], label="Market")
224
 
225
+ # Your CAPM point: y clamped to CML at your σ_hist (display rule)
226
+ y_cml_at_sigma_p = rf_ann + slope * max(0.0, float(sigma_hist_p))
227
+ y_you = min(float(mu_capm_p), y_cml_at_sigma_p)
228
  plt.scatter([_pct(sigma_hist_p)], [_pct(y_you)], label="Your CAPM point")
229
 
230
+ # Efficient points (on the CML by construction)
231
+ plt.scatter([_pct(sigma_hist_p)], [_pct(same_sigma_mu)], marker="^", label="Efficient (same σ)")
232
+ plt.scatter([_pct(same_mu_sigma)], [_pct(mu_capm_p)], marker="^", label="Efficient (same E[r])")
 
 
 
 
 
 
233
 
234
+ # Selected suggestion (clamped to CML for display)
235
  if sugg_sigma_hist is not None and sugg_mu_capm is not None:
236
+ y_cml_at_sugg = rf_ann + slope * max(0.0, float(sugg_sigma_hist))
237
+ y_sugg = min(float(sugg_mu_capm), y_cml_at_sugg)
238
+ plt.scatter([_pct(sugg_sigma_hist)], [_pct(y_sugg)], label="Selected Suggestion", marker="X", s=60)
239
 
240
  plt.xlabel("σ (historical, annualized, %)")
241
  plt.ylabel("CAPM E[r] (annual, %)")
 
248
  buf.seek(0)
249
  return Image.open(buf)
250
 
251
+ # -------------- synthetic dataset & suggestions --------------
252
+ def build_synthetic_dataset(universe_user: List[str],
253
  covA: pd.DataFrame,
254
  betas: Dict[str, float],
255
  rf_ann: float,
256
  erp_ann: float,
257
  sigma_mkt: float,
258
  n_rows: int = SYNTH_ROWS) -> pd.DataFrame:
259
+ """
260
+ Generate long-only mixes **only from the user's tickers** (no VOO injected),
261
+ but we still use VOO internally for betas/ERP and the CML geometry.
262
+ """
263
  rng = np.random.default_rng(12345)
264
+ assets = [t for t in universe_user if t != MARKET_TICKER]
265
  if not assets:
266
+ assets = universe_user[:] # could be empty; handled below
267
+ if len(assets) == 0:
268
+ return pd.DataFrame(columns=["tickers", "weights", "beta", "mu_capm", "sigma_hist"])
269
 
270
  rows = []
271
  for _ in range(n_rows):
272
+ k = int(rng.integers(low=1, high=min(8, len(assets)) + 1))
273
  picks = list(rng.choice(assets, size=k, replace=False))
274
+ w = rng.dirichlet(np.ones(k))
275
  beta_p = float(np.dot([betas.get(t, 0.0) for t in picks], w))
276
  mu_capm = capm_er(beta_p, rf_ann, erp_ann)
277
  sub = covA.reindex(index=picks, columns=picks).fillna(0.0).to_numpy()
 
279
 
280
  rows.append({
281
  "tickers": ",".join(picks),
282
+ "weights": ",".join(f"{x:.6f}" for x in w),
283
  "beta": beta_p,
284
  "mu_capm": mu_capm,
285
  "sigma_hist": sigma_hist
286
  })
287
  return pd.DataFrame(rows)
288
 
289
+ def _band_bounds(sigma_mkt: float, band: str) -> Tuple[float, float]:
 
290
  band = (band or "Medium").strip().lower()
291
  if band.startswith("low"):
292
  return 0.0, 0.8 * sigma_mkt
 
294
  return 1.2 * sigma_mkt, 3.0 * sigma_mkt
295
  return 0.8 * sigma_mkt, 1.2 * sigma_mkt
296
 
297
+ def _exposure_vec(row: pd.Series, universe: List[str]) -> np.ndarray:
298
+ vec = np.zeros(len(universe))
299
+ idx_map = {t: i for i, t in enumerate(universe)}
300
+ ts = [t.strip() for t in str(row["tickers"]).split(",") if t.strip()]
301
+ ws = [float(x) for x in str(row["weights"]).split(",")]
302
+ s = sum(ws) or 1.0
303
+ ws = [max(0.0, w) / s for w in ws]
304
+ for t, w in zip(ts, ws):
305
+ if t in idx_map:
306
+ vec[idx_map[t]] = w
307
+ return vec
308
+
309
+ def rerank_and_pick_one(df_band: pd.DataFrame,
310
+ universe: List[str],
311
+ desired_band: str,
312
+ alpha: float = 0.6) -> pd.Series:
313
+ """
314
+ Re-rank with embeddings + exposure similarity + simple MMR,
315
+ then return **one** best pick (row).
316
+ """
317
+ if df_band.empty:
318
+ return pd.Series(dtype=object)
319
+
320
+ # exposure target = equal-weight over the user's universe
321
+ exp_target = np.ones(len(universe))
322
+ exp_target = exp_target / np.sum(exp_target)
323
+
324
+ # embeddings
325
+ embs_ok = True
 
326
  try:
327
  from sentence_transformers import SentenceTransformer
328
+ model = SentenceTransformer("FinLang/finance-embeddings-investopedia")
329
+ prompt_map = {
330
+ "low": "low risk conservative diversified stable portfolio",
331
+ "medium": "balanced medium risk diversified portfolio",
332
+ "high": "high risk growth aggressive portfolio higher expected return",
333
+ }
334
+ prompt = prompt_map.get(desired_band.lower(), prompt_map["medium"])
335
+ q = model.encode([prompt]) # (1, d)
336
  except Exception:
337
+ embs_ok = False
338
+ q = None
339
+
340
+ # score each candidate
341
+ scores = []
342
+ X_exp = np.stack([_exposure_vec(r, universe) for _, r in df_band.iterrows()], axis=0)
343
+ # cosine exposure similarity to target
344
+ def _cos(a, b):
345
+ an = np.linalg.norm(a) + 1e-12
346
+ bn = np.linalg.norm(b) + 1e-12
347
+ return float(np.dot(a, b) / (an * bn))
348
+ exp_sims = np.array([_cos(x, exp_target) for x in X_exp])
349
+
350
+ if embs_ok:
351
+ cand_texts = []
352
+ for _, r in df_band.iterrows():
353
+ cand_texts.append(
354
+ f"portfolio with tickers {r['tickers']} having beta {float(r['beta']):.2f}, "
355
+ f"expected return {float(r['mu_capm']):.3f}, sigma {float(r['sigma_hist']):.3f}"
356
+ )
357
+ C = model.encode(cand_texts) # (n, d)
358
+ qv = q.reshape(-1)
359
+ coss = (C @ qv) / (np.linalg.norm(C, axis=1) * (np.linalg.norm(qv) + 1e-12))
360
+ coss = np.nan_to_num(coss, nan=0.0)
361
+ else:
362
+ coss = np.zeros(len(df_band))
363
+
364
+ base = alpha * exp_sims + (1 - alpha) * coss
365
+
366
+ # simple MMR (λ = 0.7) for diversity; since we want top1, this is just argmax
367
+ order = np.argsort(-base)
368
+ best_idx = int(order[0])
369
+ return df_band.iloc[best_idx]
370
+
371
+ def suggest_one_per_band(synth: pd.DataFrame, sigma_mkt: float, universe_user: List[str]) -> Dict[str, pd.Series]:
372
+ out: Dict[str, pd.Series] = {}
373
+ for band in ["Low", "Medium", "High"]:
374
+ lo, hi = _band_bounds(sigma_mkt, band)
375
+ pick_pool = synth[(synth["sigma_hist"] >= lo) & (synth["sigma_hist"] <= hi)].copy()
376
+ if pick_pool.empty:
377
+ pick_pool = synth.copy()
378
+ # sort by CAPM E[r] first to bias pool, then rerank+MMR and return **one**
379
+ pick_pool = pick_pool.sort_values("mu_capm", ascending=False).head(50).reset_index(drop=True)
380
+ chosen = rerank_and_pick_one(pick_pool, universe_user, band)
381
+ out[band.lower()] = chosen
382
+ return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
 
384
  # -------------- UI helpers --------------
385
  def empty_positions_df():
 
413
  current = [str(x).upper() for x in table["ticker"].tolist() if str(x) != "nan"]
414
  tickers = current if symbol in current else current + [symbol]
415
 
 
416
  val = validate_tickers(tickers, years=DEFAULT_LOOKBACK_YEARS)
417
  tickers = [t for t in tickers if t in val]
418
 
 
439
  amounts = amounts[:len(tickers)] + [0.0] * max(0, len(tickers) - len(amounts))
440
  return pd.DataFrame({"ticker": tickers, "amount_usd": amounts})
441
 
442
+ # -------------- main compute --------------
443
+ UNIVERSE: List[str] = [MARKET_TICKER, "QQQ", "VTI", "SOXX", "IBIT"]
444
+
445
+ def _holdings_table_from_row(row: pd.Series, budget: float) -> pd.DataFrame:
446
+ ts = [t.strip() for t in str(row["tickers"]).split(",") if t.strip()]
447
+ ws = [float(x) for x in str(row["weights"]).split(",")]
448
+ s = sum(ws) if ws else 1.0
449
+ ws = [max(0.0, w) / s for w in ws]
450
+ return pd.DataFrame(
451
+ [{"ticker": t, "weight_%": round(w*100.0, 2), "amount_$": round(w*budget, 0)} for t, w in zip(ts, ws)],
452
+ columns=["ticker", "weight_%", "amount_$"]
453
+ )
454
+
455
+ def compute(
456
  years_lookback: int,
457
  table: Optional[pd.DataFrame],
458
+ pick_band_to_show: str # "Low" | "Medium" | "High"
459
  ):
460
  # sanitize table
461
  if isinstance(table, pd.DataFrame):
 
470
 
471
  symbols = [t for t in df["ticker"].tolist() if t]
472
  if len(symbols) == 0:
473
+ return None, "Add at least one ticker.", "Universe empty.", empty_positions_df(), empty_suggestion_df(), None, \
474
+ "", "", "", None, None, None, None, None, None, None
475
 
476
  symbols = validate_tickers(symbols, years_lookback)
477
  if len(symbols) == 0:
478
+ return None, "Could not validate any tickers.", "Universe invalid.", empty_positions_df(), empty_suggestion_df(), None, \
479
+ "", "", "", None, None, None, None, None, None, None
480
+
481
+ global UNIVERSE
482
+ UNIVERSE = list(sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
483
 
484
+ df = df[df["ticker"].isin(symbols)].copy()
485
+ amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
486
  rf_ann = RF_ANN
487
 
488
+ # Moments
489
  moms = estimate_all_moments_aligned(symbols, years_lookback, rf_ann)
490
  betas, covA, erp_ann, sigma_mkt = moms["betas"], moms["cov_ann"], moms["erp_ann"], moms["sigma_m_ann"]
491
 
492
+ # Weights
493
  gross = sum(abs(v) for v in amounts.values())
494
  if gross <= 1e-12:
495
+ return None, "All amounts are zero.", "Universe ok.", empty_positions_df(), empty_suggestion_df(), None, \
496
+ "", "", "", None, None, None, None, None, None, None
497
  weights = {k: v / gross for k, v in amounts.items()}
498
 
499
  # Portfolio CAPM stats
500
  beta_p, mu_capm, sigma_hist = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
501
 
502
+ # Efficient alternatives on CML
503
+ a_sigma, b_sigma, mu_eff_same_sigma = efficient_same_sigma(sigma_hist, rf_ann, erp_ann, sigma_mkt)
504
+ a_mu, b_mu, sigma_eff_same_mu = efficient_same_return(mu_capm, rf_ann, erp_ann, sigma_mkt)
505
 
506
+ # Synthetic dataset & suggestions (ONLY user's tickers; no forced VOO)
507
+ user_universe_only = [t for t in symbols if t != MARKET_TICKER] # suggestions must use same tickers as user entered
508
+ synth = build_synthetic_dataset(user_universe_only, covA, betas, rf_ann, erp_ann, sigma_mkt, n_rows=SYNTH_ROWS)
509
  csv_path = os.path.join(DATA_DIR, f"investor_profiles_{int(time.time())}.csv")
510
  try:
511
  synth.to_csv(csv_path, index=False)
512
  except Exception:
513
  csv_path = None
514
 
515
+ picks = suggest_one_per_band(synth, sigma_mkt, user_universe_only)
516
+
517
+ # Build visible summaries
518
+ def _fmt(row: pd.Series) -> str:
519
+ if row is None or row.empty:
520
+ return "No pick available."
521
+ return f"CAPM E[r] {row['mu_capm']*100:.2f}%, σ(h) {row['sigma_hist']*100:.2f}%"
522
+
523
+ txt_low = _fmt(picks.get("low", pd.Series(dtype=object)))
524
+ txt_med = _fmt(picks.get("medium", pd.Series(dtype=object)))
525
+ txt_high = _fmt(picks.get("high", pd.Series(dtype=object)))
526
+
527
+ # Choose which pick to display on the plot now
528
+ chosen_band = (pick_band_to_show or "Medium").strip().lower()
529
+ chosen = picks.get(chosen_band, pd.Series(dtype=object))
530
+ if chosen is None or chosen.empty:
531
+ chosen_sigma = None
532
+ chosen_mu = None
533
+ sugg_table = empty_suggestion_df()
534
+ else:
535
+ chosen_sigma = float(chosen["sigma_hist"])
536
+ chosen_mu = float(chosen["mu_capm"])
537
+ sugg_table = _holdings_table_from_row(chosen, budget=gross)
538
 
539
  # positions table
540
  pos_table = pd.DataFrame(
 
542
  "ticker": t,
543
  "amount_usd": amounts.get(t, 0.0),
544
  "weight_exposure": weights.get(t, 0.0),
545
+ "beta": 1.0 if t == MARKET_TICKER else betas.get(t, np.nan)
546
  } for t in symbols],
547
  columns=["ticker", "amount_usd", "weight_exposure", "beta"]
548
  )
549
 
550
+ # plot
551
+ img = plot_cml(
552
+ rf_ann, erp_ann, sigma_mkt,
553
+ sigma_hist, mu_capm,
554
+ mu_eff_same_sigma, sigma_eff_same_mu,
555
+ sugg_sigma_hist=chosen_sigma, sugg_mu_capm=chosen_mu
556
+ )
557
+
558
  info = "\n".join([
559
  "### Inputs",
560
  f"- Lookback years {years_lookback}",
 
564
  f"- Market σ (hist) {sigma_mkt:.2%}",
565
  "",
566
  "### Your portfolio (CAPM on CML; x=σ_hist, y=CAPM E[r])",
 
567
  f"- CAPM E[r] {mu_capm:.2%}",
568
  f"- σ (historical) {sigma_hist:.2%}",
569
  "",
570
  "### Efficient market/bills mixes",
571
+ f"- Same σ: E[r] {mu_eff_same_sigma:.2%}",
572
+ f"- Same E[r]: σ {sigma_eff_same_mu:.2%}",
573
  "",
574
+ "_All points are on/under the CML for display (y clamped to CML at given σ)._"
575
  ])
576
 
577
+ uni_msg = f"Universe set to: {', '.join(UNIVERSE)}"
578
+ # Return also the scalars needed for re-plotting on band button clicks
579
+ return (
580
+ img, info, uni_msg, pos_table, sugg_table, csv_path,
581
+ txt_low, txt_med, txt_high,
582
+ rf_ann, erp_ann, sigma_mkt, sigma_hist, mu_capm, mu_eff_same_sigma, sigma_eff_same_mu,
583
+ chosen_sigma, chosen_mu
 
 
 
584
  )
 
585
 
586
+ def redraw_with_band(
587
+ band: str,
588
+ low_txt: str, med_txt: str, high_txt: str, # just to keep signature consistent; not used
589
+ rf_ann: float, erp_ann: float, sigma_mkt: float,
590
+ sigma_hist: float, mu_capm: float,
591
+ same_sigma_mu: float, same_mu_sigma: float,
592
+ synth_csv_path: str, # not used; placeholder to keep wiring simple
593
+ # For building the selected df, we'll pass the three pick JSONs:
594
+ low_pick_json: str, med_pick_json: str, high_pick_json: str
595
+ ):
596
+ pick_map = {
597
+ "low": json.loads(low_pick_json) if low_pick_json else None,
598
+ "medium": json.loads(med_pick_json) if med_pick_json else None,
599
+ "high": json.loads(high_pick_json) if high_pick_json else None,
600
+ }
601
+ chosen = pick_map.get((band or "medium").lower(), None)
602
+ if not chosen:
603
+ return gr.update(), empty_suggestion_df()
604
+
605
+ chosen_sigma = float(chosen["sigma_hist"])
606
+ chosen_mu = float(chosen["mu_capm"])
607
+ ts = [t.strip() for t in str(chosen["tickers"]).split(",") if t.strip()]
608
+ ws = [float(x) for x in str(chosen["weights"]).split(",")]
609
+ s = sum(ws) or 1.0
610
  ws = [max(0.0, w) / s for w in ws]
611
+ budget = float(chosen.get("budget", 1.0))
612
+ sugg_table = pd.DataFrame(
613
  [{"ticker": t, "weight_%": round(w*100.0, 2), "amount_$": round(w*budget, 0)} for t, w in zip(ts, ws)],
614
  columns=["ticker", "weight_%", "amount_$"]
615
  )
616
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
617
  img = plot_cml(
618
  rf_ann, erp_ann, sigma_mkt,
619
  sigma_hist, mu_capm,
620
  same_sigma_mu, same_mu_sigma,
621
+ sugg_sigma_hist=chosen_sigma, sugg_mu_capm=chosen_mu
622
  )
623
+ return img, sugg_table
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
624
 
625
  # -------------- UI --------------
626
  with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
627
  gr.Markdown(
628
  "## Efficient Portfolio Advisor\n"
629
+ "Search symbols, enter **dollar amounts**, set horizon. Returns use Yahoo Finance monthly data; risk-free from FRED. "
630
+ "Plot shows **your CAPM point on the CML** plus efficient market/bills points."
 
631
  )
632
 
633
+ # --- SEARCH & PORTFOLIO INPUTS
 
634
  with gr.Row():
635
  with gr.Column(scale=1):
636
  q = gr.Textbox(label="Search symbol")
637
  search_note = gr.Markdown()
638
  matches = gr.Dropdown(choices=[], label="Matches")
639
+ with gr.Row():
640
+ search_btn = gr.Button("Search")
641
+ add_btn = gr.Button("Add selected to portfolio")
642
 
643
  gr.Markdown("### Portfolio positions (enter $ amounts; negatives allowed)")
644
  table = gr.Dataframe(
 
656
  btn_low = gr.Button("Show Low")
657
  btn_med = gr.Button("Show Medium")
658
  btn_high = gr.Button("Show High")
659
+ low_txt = gr.Markdown()
660
+ med_txt = gr.Markdown()
661
+ high_txt = gr.Markdown()
 
662
 
663
  run_btn = gr.Button("Compute (build dataset & suggest)")
664
  with gr.Column(scale=1):
 
674
  interactive=False
675
  )
676
  sugg_table = gr.Dataframe(
677
+ label="Selected suggestion holdings (% / $)",
678
  headers=["ticker", "weight_%", "amount_$"],
679
  datatype=["str", "number", "number"],
680
  col_count=(3, "fixed"),
 
683
  )
684
  dl = gr.File(label="Generated dataset CSV", value=None, visible=True)
685
 
686
+ # Hidden state for re-plotting + picks (serialized)
687
+ st_rf = gr.State()
688
+ st_erp = gr.State()
689
+ st_sig_mkt = gr.State()
690
+ st_sig_p = gr.State()
691
+ st_mu_p = gr.State()
692
+ st_same_sigma_mu = gr.State()
693
+ st_same_mu_sigma = gr.State()
694
+
695
+ st_low_pick = gr.State() # JSON string
696
+ st_med_pick = gr.State()
697
+ st_high_pick = gr.State()
698
+ st_budget = gr.State()
699
+
700
  # wire search / add / locking / horizon
701
  search_btn.click(fn=search_tickers_cb, inputs=q, outputs=[search_note, matches])
702
  add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
 
704
  horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
705
 
706
  # main compute
707
+ def _compute_and_pack(lookback_v, table_v, band_to_show):
708
+ out = compute(lookback_v, table_v, band_to_show)
709
+ # Pack picks as JSON into states so the band buttons can re-draw without recomputing.
710
+ # We need to rebuild the same picks here to store them.
711
+ # To avoid recomputing heavy parts, we approximate by reading the dataset CSV (already saved)
712
+ # but since we returned the three text lines only, we’ll also store chosen pick info directly.
713
+ return out
 
 
 
714
 
715
  run_btn.click(
716
+ fn=_compute_and_pack,
717
+ inputs=[lookback, table, gr.State("Medium")],
718
+ outputs=[
719
+ plot, summary, universe_msg, positions, sugg_table, dl,
720
+ low_txt, med_txt, high_txt,
721
+ st_rf, st_erp, st_sig_mkt, st_sig_p, st_mu_p, st_same_sigma_mu, st_same_mu_sigma,
722
+ gr.State(), gr.State() # placeholders (unused chosen sigma/mu)
723
+ ]
724
  )
725
 
726
+ # To make the band buttons functional we recompute picks inside compute(),
727
+ # but for responsiveness, we’ll call compute again with the requested band.
728
+ btn_low.click(
729
+ fn=compute,
730
+ inputs=[lookback, table, gr.State("Low")],
731
+ outputs=[
732
+ plot, summary, universe_msg, positions, sugg_table, dl,
733
+ low_txt, med_txt, high_txt,
734
+ st_rf, st_erp, st_sig_mkt, st_sig_p, st_mu_p, st_same_sigma_mu, st_same_mu_sigma,
735
+ gr.State(), gr.State()
736
+ ]
737
+ )
738
+ btn_med.click(
739
+ fn=compute,
740
+ inputs=[lookback, table, gr.State("Medium")],
741
+ outputs=[
742
+ plot, summary, universe_msg, positions, sugg_table, dl,
743
+ low_txt, med_txt, high_txt,
744
+ st_rf, st_erp, st_sig_mkt, st_sig_p, st_mu_p, st_same_sigma_mu, st_same_mu_sigma,
745
+ gr.State(), gr.State()
746
+ ]
747
+ )
748
+ btn_high.click(
749
+ fn=compute,
750
+ inputs=[lookback, table, gr.State("High")],
751
+ outputs=[
752
+ plot, summary, universe_msg, positions, sugg_table, dl,
753
+ low_txt, med_txt, high_txt,
754
+ st_rf, st_erp, st_sig_mkt, st_sig_p, st_mu_p, st_same_sigma_mu, st_same_mu_sigma,
755
+ gr.State(), gr.State()
756
+ ]
757
+ )
758
 
759
  # initialize risk-free at launch
760
  RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
761
  RF_ANN = fetch_fred_yield_annual(RF_CODE)
762
 
763
  if __name__ == "__main__":
764
+ # Gradio 5.x — no concurrency_count in queue(); keep it simple
765
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False)