Tulitula commited on
Commit
56a394e
·
verified ·
1 Parent(s): efa2e5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -117
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py (CML-safe: sigma uses full cov incl. market)
2
  import os, io, math, time, warnings
3
  warnings.filterwarnings("ignore")
4
 
@@ -22,8 +22,8 @@ MARKET_TICKER = "VOO"
22
 
23
  SYNTH_ROWS = 1000
24
  EMBED_MODEL_NAME = "FinLang/finance-embeddings-investopedia"
25
- EMBED_ALPHA = 0.6
26
- MMR_LAMBDA = 0.7
27
 
28
  HORIZON_YEARS = 10
29
  RF_CODE = "DGS10"
@@ -126,7 +126,7 @@ def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
126
  betas[s] = cov_sm / var_m
127
  betas[MARKET_TICKER] = 1.0
128
 
129
- # FULL covariance including MARKET_TICKER (crucial to keep points ≤ CML)
130
  cov_all_ann = pd.DataFrame(np.cov(R.values.T, ddof=1) * 12.0,
131
  index=R.columns, columns=R.columns)
132
 
@@ -186,19 +186,19 @@ def plot_cml_hybrid(rf_ann, erp_ann, sigma_mkt,
186
  buf = io.BytesIO(); plt.savefig(buf, format="png"); plt.close(fig); buf.seek(0)
187
  return Image.open(buf)
188
 
189
- # -------------- synthetic dataset (σ uses FULL cov) --------------
190
- def build_synthetic_dataset(universe: List[str],
191
- cov_all_ann: pd.DataFrame,
192
- betas: Dict[str, float],
193
- rf_ann: float,
194
- erp_ann: float,
195
- n_rows: int = SYNTH_ROWS) -> pd.DataFrame:
 
196
  rng = np.random.default_rng(12345)
197
- if MARKET_TICKER not in universe: universe = list(universe) + [MARKET_TICKER]
 
198
  rows = []
199
  for _ in range(n_rows):
200
- k = int(rng.integers(low=2, high=min(8, len(universe)) + 1))
201
- picks = list(rng.choice(universe, size=k, replace=False))
202
  w = rng.dirichlet(np.ones(k))
203
  beta_p = float(np.dot([betas.get(t, 0.0) for t in picks], w))
204
  mu_capm = capm_er(beta_p, rf_ann, erp_ann)
@@ -219,16 +219,7 @@ def _band_bounds_sigma_hist(sigma_mkt: float, band: str) -> Tuple[float, float]:
219
  if band.startswith("high"): return 1.2 * sigma_mkt, 3.0 * sigma_mkt
220
  return 0.8 * sigma_mkt, 1.2 * sigma_mkt
221
 
222
- def _summarize_three(df: pd.DataFrame) -> pd.DataFrame:
223
- if df.empty: return pd.DataFrame(columns=["pick","CAPM E[r] %","σ (hist) %","tickers"])
224
- out = df.copy()
225
- out = out.assign(**{"CAPM E[r] %": (out["mu_capm"]*100).round(2),
226
- "σ (hist) %": (out["sigma_hist"]*100).round(2),
227
- "tickers": out["tickers"]})[["CAPM E[r] %","σ (hist) %","tickers"]]
228
- out = out.reset_index(drop=True); out.insert(0, "pick", [1,2,3][:len(out)])
229
- return out
230
-
231
- # -------------- embeddings & re-ranking --------------
232
  _EMBED_MODEL = None
233
  _TICKER_EMBED_CACHE: Dict[str, np.ndarray] = {}
234
 
@@ -272,48 +263,38 @@ def _exposure_similarity(user_map: Dict[str,float], cand_map: Dict[str,float]) -
272
  c = {k:abs(v)/s_c for k,v in cand_map.items()}
273
  common = set(u)&set(c); return float(sum(min(u[t],c[t]) for t in common))
274
 
275
- def rerank_band_with_embeddings(user_df: pd.DataFrame, band_df: pd.DataFrame,
276
- alpha: float = EMBED_ALPHA, mmr_lambda: float = MMR_LAMBDA, top_k: int = 3) -> pd.DataFrame:
 
 
 
 
 
277
  try:
 
 
 
278
  u_t = user_df["ticker"].astype(str).str.upper().tolist()
279
  u_w = pd.to_numeric(user_df["amount_usd"], errors="coerce").fillna(0.0).tolist()
280
  u_map = {t: float(w) for t, w in zip(u_t, u_w)}
281
  u_embed = _portfolio_embedding(u_t, u_w)
282
 
283
- cand_rows = []; cand_embeds = []
284
  for _, r in band_df.iterrows():
285
  ts = [t.strip().upper() for t in str(r["tickers"]).split(",")]
286
  ws = [float(x) for x in str(r["weights"]).split(",")]
287
  s = sum(max(0.0,w) for w in ws) or 1.0
288
  ws = [max(0.0,w)/s for w in ws]
289
  c_map = {t:w for t,w in zip(ts,ws)}
290
- c_embed = _portfolio_embedding(ts, ws); cand_embeds.append(c_embed)
291
  expo_sim = _exposure_similarity(u_map, c_map)
292
  emb_sim = _cos_sim(u_embed, c_embed)
293
- score = alpha*expo_sim + (1.0-alpha)*emb_sim
294
- cand_rows.append((score, r))
295
-
296
- if not cand_rows: return band_df.head(top_k).reset_index(drop=True)
297
-
298
- cand_embeds = np.stack(cand_embeds, axis=0)
299
- order = np.argsort([-s for s,_ in cand_rows])
300
- picked = []; picked_idx = []
301
- for i in order:
302
- if len(picked)>=top_k: break
303
- s_i, row_i = cand_rows[i]
304
- if not picked:
305
- picked.append(row_i); picked_idx.append(i); continue
306
- sim_to_picked = max(_cos_sim(cand_embeds[i], cand_embeds[j]) for j in picked_idx)
307
- mmr = mmr_lambda*s_i - (1.0-mmr_lambda)*sim_to_picked # noqa: F841 (kept for clarity)
308
- picked.append(row_i); picked_idx.append(i)
309
- out = pd.DataFrame([r for r in picked]).drop_duplicates().head(top_k).reset_index(drop=True)
310
- if out.empty: out = band_df.head(top_k).reset_index(drop=True)
311
- out.insert(0,"pick",[1,2,3][:len(out)])
312
- return out
313
  except Exception:
314
- out = band_df.sort_values("mu_capm", ascending=False).head(top_k).reset_index(drop=True)
315
- out.insert(0,"pick",[1,2,3][:len(out)])
316
- return out
317
 
318
  # -------------- UI helpers --------------
319
  def empty_positions_df(): return pd.DataFrame(columns=["ticker","amount_usd","weight_exposure","beta"])
@@ -364,7 +345,8 @@ def lock_ticker_column(tb: Optional[pd.DataFrame]):
364
  # -------------- compute core --------------
365
  UNIVERSE: List[str] = [MARKET_TICKER, "QQQ", "VTI", "SOXX", "IBIT"]
366
 
367
- def _pick_to_holdings(row: pd.Series, budget: float) -> pd.DataFrame:
 
368
  ts = [t.strip().upper() for t in str(row["tickers"]).split(",")]
369
  ws = [float(x) for x in str(row["weights"]).split(",")]
370
  s = sum(max(0.0,w) for w in ws) or 1.0
@@ -372,20 +354,23 @@ def _pick_to_holdings(row: pd.Series, budget: float) -> pd.DataFrame:
372
  return pd.DataFrame([{"ticker": t, "weight_%": round(w*100,2), "amount_$": round(w*budget,0)} for t,w in zip(ts,ws)],
373
  columns=["ticker","weight_%","amount_$"])
374
 
375
- def compute_all(years_lookback: int, table: Optional[pd.DataFrame], use_embeddings: bool):
 
376
  df = table.copy() if isinstance(table,pd.DataFrame) else pd.DataFrame(columns=["ticker","amount_usd"])
377
  df = df.dropna(how="all")
378
  if "ticker" not in df.columns: df["ticker"] = []
379
  if "amount_usd" not in df.columns: df["amount_usd"] = []
380
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
381
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
 
382
  symbols = [t for t in df["ticker"].tolist() if t]
383
  if len(symbols)==0: raise gr.Error("Add at least one ticker.")
384
  symbols = validate_tickers(symbols, years_lookback)
385
  if len(symbols)==0: raise gr.Error("Could not validate any tickers.")
386
 
387
  global UNIVERSE
388
- UNIVERSE = list(sorted(set([s for s in symbols] + [MARKET_TICKER])))[:MAX_TICKERS]
 
389
 
390
  df = df[df["ticker"].isin(symbols)].copy()
391
  amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
@@ -403,30 +388,29 @@ def compute_all(years_lookback: int, table: Optional[pd.DataFrame], use_embeddin
403
  a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_hist, rf_ann, erp_ann, sigma_mkt)
404
  a_mu, b_mu, sigma_eff_mu = efficient_same_return(mu_capm, rf_ann, erp_ann, sigma_mkt)
405
 
406
- synth = build_synthetic_dataset(UNIVERSE, cov_all_ann, betas, rf_ann, erp_ann, n_rows=SYNTH_ROWS)
 
407
  csv_path = os.path.join(DATA_DIR, f"investor_profiles_{int(time.time())}.csv")
408
  try: synth.to_csv(csv_path, index=False)
409
  except Exception: csv_path = None
410
 
411
- def band_top3(band: str) -> pd.DataFrame:
 
412
  lo, hi = _band_bounds_sigma_hist(sigma_mkt, band)
413
- pick = synth[(synth["sigma_hist"]>=lo) & (synth["sigma_hist"]<=hi)].copy()
414
- if pick.empty: pick = synth.copy()
415
- pick = pick.sort_values("mu_capm", ascending=False).head(50).reset_index(drop=True)
416
- if use_embeddings:
417
- user_df = pd.DataFrame({"ticker": list(weights.keys()), "amount_usd": [amounts[t] for t in weights.keys()]})
418
- top3 = rerank_band_with_embeddings(user_df, pick, EMBED_ALPHA, MMR_LAMBDA, top_k=3)
419
- else:
420
- top3 = pick.head(3).reset_index(drop=True); top3.insert(0,"pick",[1,2,3][:len(top3)])
421
- return top3
422
-
423
- top3_low, top3_med, top3_high = band_top3("Low"), band_top3("Medium"), band_top3("High")
424
- low_sum, med_sum, high_sum = _summarize_three(top3_low), _summarize_three(top3_med), _summarize_three(top3_high)
425
 
 
426
  pos_table = pd.DataFrame([{
427
  "ticker": t, "amount_usd": amounts.get(t,0.0),
428
  "weight_exposure": weights.get(t,0.0),
429
- "beta": 1.0 if t==MARKET_TICKER else betas.get(t, np.nan)
430
  } for t in symbols], columns=["ticker","amount_usd","weight_exposure","beta"])
431
 
432
  info = "\n".join([
@@ -446,6 +430,7 @@ def compute_all(years_lookback: int, table: Optional[pd.DataFrame], use_embeddin
446
  f"- Same σ as your portfolio: Market {a_sigma:.2f}, Bills {b_sigma:.2f} → E[r] {mu_eff_sigma:.2%}",
447
  f"- Same E[r] as your portfolio: Market {a_mu:.2f}, Bills {b_mu:.2f} → σ {sigma_eff_mu:.2%}",
448
  "",
 
449
  "_All points are guaranteed on/under the CML because σ uses the full covariance (incl. market)._"
450
  ])
451
 
@@ -454,23 +439,20 @@ def compute_all(years_lookback: int, table: Optional[pd.DataFrame], use_embeddin
454
  mu_capm=mu_capm, sigma_hist=sigma_hist,
455
  mu_eff_same_sigma=mu_eff_sigma, sigma_eff_same_return=sigma_eff_mu,
456
  pos_table=pos_table, info=info, uni_msg=uni_msg, csv_path=csv_path,
457
- low_sum=low_sum, med_sum=med_sum, high_sum=high_sum,
458
- top3_low=top3_low, top3_med=top3_med, top3_high=top3_high,
459
- budget=sum(abs(v) for v in amounts.values()))
460
-
461
- def compute_and_render(years_lookback: int, table: Optional[pd.DataFrame], use_embeddings: bool,
462
- which_band: str, pick_idx: int):
463
- outs = compute_all(years_lookback, table, use_embeddings)
464
- band = (which_band or "Medium").strip().title()
465
- idx = max(1, min(3, int(pick_idx))) - 1
466
- top3 = outs["top3_med"] if band=="Medium" else (outs["top3_low"] if band=="Low" else outs["top3_high"])
467
-
468
- if top3.empty:
469
- sugg_mu = None; sugg_sigma_hist = None; holdings = empty_holdings_df()
470
- else:
471
- row = top3.iloc[min(idx, len(top3)-1)]
472
  sugg_mu = float(row["mu_capm"]); sugg_sigma_hist = float(row["sigma_hist"])
473
- holdings = _pick_to_holdings(row, outs["budget"])
474
 
475
  img = plot_cml_hybrid(
476
  outs["rf_ann"], outs["erp_ann"], outs["sigma_mkt"],
@@ -478,16 +460,26 @@ def compute_and_render(years_lookback: int, table: Optional[pd.DataFrame], use_e
478
  outs["mu_eff_same_sigma"], outs["sigma_eff_same_return"],
479
  sugg_mu, sugg_sigma_hist
480
  )
481
- return (img, outs["info"], outs["uni_msg"], outs["pos_table"],
482
- holdings, outs["csv_path"], outs["low_sum"], outs["med_sum"], outs["high_sum"])
 
 
 
 
 
 
 
 
 
483
 
484
  # -------------- UI --------------
485
  with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
486
  gr.Markdown(
487
  "## Efficient Portfolio Advisor\n"
488
  "Plot uses **x = historical σ** and **y = CAPM E[r] = rf + β·ERP**. "
489
- "Efficient (same σ) and (same E[r]) market/bills points are shown. "
490
- "Suggestions come from 1,000 mixes; embeddings + MMR add diversity."
 
491
  )
492
  with gr.Row():
493
  with gr.Column(scale=1):
@@ -499,18 +491,16 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
499
  table = gr.Dataframe(value=pd.DataFrame(columns=["ticker","amount_usd"]), interactive=True)
500
  horizon = gr.Number(label="Horizon in years (1–100)", value=HORIZON_YEARS, precision=0)
501
  lookback = gr.Slider(1, 15, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years")
502
- use_emb = gr.Checkbox(value=True, label="Use finance embeddings + MMR for diverse picks")
503
- gr.Markdown("### Suggestions")
504
- with gr.Tabs():
505
- with gr.Tab("Low"):
506
- low_summary = gr.Dataframe(value=empty_holdings_df(), interactive=False, label="Top 3 (Low risk)")
507
- pick_low = gr.Radio(choices=["1","2","3"], value="1", label="Select a pick in Low")
508
- with gr.Tab("Medium"):
509
- med_summary = gr.Dataframe(value=empty_holdings_df(), interactive=False, label="Top 3 (Medium risk)")
510
- pick_med = gr.Radio(choices=["1","2","3"], value="1", label="Select a pick in Medium")
511
- with gr.Tab("High"):
512
- high_summary = gr.Dataframe(value=empty_holdings_df(), interactive=False, label="Top 3 (High risk)")
513
- pick_high = gr.Radio(choices=["1","2","3"], value="1", label="Select a pick in High")
514
  run_btn = gr.Button("Compute (build dataset & suggest)")
515
  with gr.Column(scale=1):
516
  plot = gr.Image(label="Capital Market Line (CAPM)", type="pil")
@@ -521,35 +511,40 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
521
  label="Selected suggestion holdings (% / $)")
522
  dl = gr.File(label="Generated dataset CSV", value=None, visible=True)
523
 
 
524
  search_btn.click(fn=search_tickers_cb, inputs=q, outputs=[search_note, matches])
525
  add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
526
  table.change(fn=lock_ticker_column, inputs=table, outputs=table)
527
  horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
528
 
 
529
  run_btn.click(
530
- fn=compute_and_render,
531
- inputs=[lookback, table, use_emb, gr.State("Medium"), gr.State(1)],
532
- outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_summary, med_summary, high_summary]
533
  )
534
- pick_low.change(
535
- fn=compute_and_render,
536
- inputs=[lookback, table, use_emb, gr.State("Low"), pick_low],
537
- outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_summary, med_summary, high_summary]
 
 
538
  )
539
- pick_med.change(
540
- fn=compute_and_render,
541
- inputs=[lookback, table, use_emb, gr.State("Medium"), pick_med],
542
- outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_summary, med_summary, high_summary]
543
  )
544
- pick_high.change(
545
- fn=compute_and_render,
546
- inputs=[lookback, table, use_emb, gr.State("High"), pick_high],
547
- outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_summary, med_summary, high_summary]
548
  )
549
 
 
550
  RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
551
  RF_ANN = fetch_fred_yield_annual(RF_CODE)
552
 
553
  if __name__ == "__main__":
554
- demo.queue()
555
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)), show_api=False, share=False)
 
1
+ # app.py — Efficient Portfolio Advisor (CML-safe, same-ticker suggestions, embeddings always on)
2
  import os, io, math, time, warnings
3
  warnings.filterwarnings("ignore")
4
 
 
22
 
23
  SYNTH_ROWS = 1000
24
  EMBED_MODEL_NAME = "FinLang/finance-embeddings-investopedia"
25
+ EMBED_ALPHA = 0.6 # exposure similarity weight
26
+ MMR_LAMBDA = 0.7 # diversity for re-ranking (kept for consistency even though we output 1 per band)
27
 
28
  HORIZON_YEARS = 10
29
  RF_CODE = "DGS10"
 
126
  betas[s] = cov_sm / var_m
127
  betas[MARKET_TICKER] = 1.0
128
 
129
+ # Full covariance including MARKET_TICKER
130
  cov_all_ann = pd.DataFrame(np.cov(R.values.T, ddof=1) * 12.0,
131
  index=R.columns, columns=R.columns)
132
 
 
186
  buf = io.BytesIO(); plt.savefig(buf, format="png"); plt.close(fig); buf.seek(0)
187
  return Image.open(buf)
188
 
189
+ # -------------- synthetic dataset (re-weights of SAME tickers) --------------
190
+ def build_synthetic_dataset_same_tickers(tickers: List[str],
191
+ cov_all_ann: pd.DataFrame,
192
+ betas: Dict[str, float],
193
+ rf_ann: float,
194
+ erp_ann: float,
195
+ n_rows: int = SYNTH_ROWS) -> pd.DataFrame:
196
+ """Generate long-only Dirichlet weights over EXACTLY the user's tickers."""
197
  rng = np.random.default_rng(12345)
198
+ picks = [t for t in tickers] # fixed set
199
+ k = len(picks)
200
  rows = []
201
  for _ in range(n_rows):
 
 
202
  w = rng.dirichlet(np.ones(k))
203
  beta_p = float(np.dot([betas.get(t, 0.0) for t in picks], w))
204
  mu_capm = capm_er(beta_p, rf_ann, erp_ann)
 
219
  if band.startswith("high"): return 1.2 * sigma_mkt, 3.0 * sigma_mkt
220
  return 0.8 * sigma_mkt, 1.2 * sigma_mkt
221
 
222
+ # -------------- embeddings & scoring --------------
 
 
 
 
 
 
 
 
 
223
  _EMBED_MODEL = None
224
  _TICKER_EMBED_CACHE: Dict[str, np.ndarray] = {}
225
 
 
263
  c = {k:abs(v)/s_c for k,v in cand_map.items()}
264
  common = set(u)&set(c); return float(sum(min(u[t],c[t]) for t in common))
265
 
266
+ def pick_best_in_band(user_df: pd.DataFrame, band_df: pd.DataFrame,
267
+ alpha: float = EMBED_ALPHA, top_N: int = 50) -> pd.Series:
268
+ """
269
+ Score candidates by alpha*exposure-sim + (1-alpha)*embedding-sim, among top_N by CAPM return.
270
+ Return the single best row.
271
+ """
272
+ if band_df.empty: return pd.Series(dtype="float64")
273
  try:
274
+ # restrict to strong candidates by return first
275
+ band_df = band_df.sort_values("mu_capm", ascending=False).head(top_N).reset_index(drop=True)
276
+
277
  u_t = user_df["ticker"].astype(str).str.upper().tolist()
278
  u_w = pd.to_numeric(user_df["amount_usd"], errors="coerce").fillna(0.0).tolist()
279
  u_map = {t: float(w) for t, w in zip(u_t, u_w)}
280
  u_embed = _portfolio_embedding(u_t, u_w)
281
 
282
+ scores = []
283
  for _, r in band_df.iterrows():
284
  ts = [t.strip().upper() for t in str(r["tickers"]).split(",")]
285
  ws = [float(x) for x in str(r["weights"]).split(",")]
286
  s = sum(max(0.0,w) for w in ws) or 1.0
287
  ws = [max(0.0,w)/s for w in ws]
288
  c_map = {t:w for t,w in zip(ts,ws)}
289
+ c_embed = _portfolio_embedding(ts, ws)
290
  expo_sim = _exposure_similarity(u_map, c_map)
291
  emb_sim = _cos_sim(u_embed, c_embed)
292
+ scores.append(alpha*expo_sim + (1.0-alpha)*emb_sim)
293
+
294
+ i_best = int(np.argmax(scores))
295
+ return band_df.iloc[i_best]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  except Exception:
297
+ return band_df.iloc[0]
 
 
298
 
299
  # -------------- UI helpers --------------
300
  def empty_positions_df(): return pd.DataFrame(columns=["ticker","amount_usd","weight_exposure","beta"])
 
345
  # -------------- compute core --------------
346
  UNIVERSE: List[str] = [MARKET_TICKER, "QQQ", "VTI", "SOXX", "IBIT"]
347
 
348
+ def _row_to_holdings(row: pd.Series, budget: float) -> pd.DataFrame:
349
+ if row is None or row.empty: return empty_holdings_df()
350
  ts = [t.strip().upper() for t in str(row["tickers"]).split(",")]
351
  ws = [float(x) for x in str(row["weights"]).split(",")]
352
  s = sum(max(0.0,w) for w in ws) or 1.0
 
354
  return pd.DataFrame([{"ticker": t, "weight_%": round(w*100,2), "amount_$": round(w*budget,0)} for t,w in zip(ts,ws)],
355
  columns=["ticker","weight_%","amount_$"])
356
 
357
+ def compute_all(years_lookback: int, table: Optional[pd.DataFrame]):
358
+ # sanitize input table
359
  df = table.copy() if isinstance(table,pd.DataFrame) else pd.DataFrame(columns=["ticker","amount_usd"])
360
  df = df.dropna(how="all")
361
  if "ticker" not in df.columns: df["ticker"] = []
362
  if "amount_usd" not in df.columns: df["amount_usd"] = []
363
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
364
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
365
+
366
  symbols = [t for t in df["ticker"].tolist() if t]
367
  if len(symbols)==0: raise gr.Error("Add at least one ticker.")
368
  symbols = validate_tickers(symbols, years_lookback)
369
  if len(symbols)==0: raise gr.Error("Could not validate any tickers.")
370
 
371
  global UNIVERSE
372
+ # Universe is your exact tickers (for suggestions we re-weight SAME tickers)
373
+ UNIVERSE = list(sorted(set(symbols)))[:MAX_TICKERS]
374
 
375
  df = df[df["ticker"].isin(symbols)].copy()
376
  amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
 
388
  a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_hist, rf_ann, erp_ann, sigma_mkt)
389
  a_mu, b_mu, sigma_eff_mu = efficient_same_return(mu_capm, rf_ann, erp_ann, sigma_mkt)
390
 
391
+ # dataset: re-weight EXACT same tickers you entered
392
+ synth = build_synthetic_dataset_same_tickers(UNIVERSE, cov_all_ann, betas, rf_ann, erp_ann, n_rows=SYNTH_ROWS)
393
  csv_path = os.path.join(DATA_DIR, f"investor_profiles_{int(time.time())}.csv")
394
  try: synth.to_csv(csv_path, index=False)
395
  except Exception: csv_path = None
396
 
397
+ # one suggestion per band (best by embedding/exposure score among candidates in band)
398
+ def best_for_band(band: str) -> pd.Series:
399
  lo, hi = _band_bounds_sigma_hist(sigma_mkt, band)
400
+ band_df = synth[(synth["sigma_hist"]>=lo) & (synth["sigma_hist"]<=hi)].copy()
401
+ if band_df.empty: band_df = synth.copy()
402
+ user_df = pd.DataFrame({"ticker": list(weights.keys()), "amount_usd": [amounts[t] for t in weights.keys()]})
403
+ return pick_best_in_band(user_df, band_df, EMBED_ALPHA, top_N=50)
404
+
405
+ best_low = best_for_band("Low")
406
+ best_med = best_for_band("Medium")
407
+ best_high = best_for_band("High")
 
 
 
 
408
 
409
+ # derived displays
410
  pos_table = pd.DataFrame([{
411
  "ticker": t, "amount_usd": amounts.get(t,0.0),
412
  "weight_exposure": weights.get(t,0.0),
413
+ "beta": betas.get(t, np.nan) if t != MARKET_TICKER else 1.0
414
  } for t in symbols], columns=["ticker","amount_usd","weight_exposure","beta"])
415
 
416
  info = "\n".join([
 
430
  f"- Same σ as your portfolio: Market {a_sigma:.2f}, Bills {b_sigma:.2f} → E[r] {mu_eff_sigma:.2%}",
431
  f"- Same E[r] as your portfolio: Market {a_mu:.2f}, Bills {b_mu:.2f} → σ {sigma_eff_mu:.2%}",
432
  "",
433
+ "_Suggestions are single picks per band, re-weighting **the same tickers you entered**, and are chosen via embeddings + exposure similarity._",
434
  "_All points are guaranteed on/under the CML because σ uses the full covariance (incl. market)._"
435
  ])
436
 
 
439
  mu_capm=mu_capm, sigma_hist=sigma_hist,
440
  mu_eff_same_sigma=mu_eff_sigma, sigma_eff_same_return=sigma_eff_mu,
441
  pos_table=pos_table, info=info, uni_msg=uni_msg, csv_path=csv_path,
442
+ best_low=best_low, best_med=best_med, best_high=best_high,
443
+ budget=gross)
444
+
445
+ def render_with_band(years_lookback: int, table: Optional[pd.DataFrame], which_band: str):
446
+ outs = compute_all(years_lookback, table)
447
+ # pick which suggestion to highlight
448
+ row = outs["best_med"]
449
+ if (which_band or "").lower().startswith("low"): row = outs["best_low"]
450
+ if (which_band or "").lower().startswith("high"): row = outs["best_high"]
451
+
452
+ sugg_mu = None; sugg_sigma_hist = None; holdings = empty_holdings_df()
453
+ if isinstance(row, pd.Series) and not row.empty:
 
 
 
454
  sugg_mu = float(row["mu_capm"]); sugg_sigma_hist = float(row["sigma_hist"])
455
+ holdings = _row_to_holdings(row, outs["budget"])
456
 
457
  img = plot_cml_hybrid(
458
  outs["rf_ann"], outs["erp_ann"], outs["sigma_mkt"],
 
460
  outs["mu_eff_same_sigma"], outs["sigma_eff_same_return"],
461
  sugg_mu, sugg_sigma_hist
462
  )
463
+
464
+ # small stats for each band (single pick)
465
+ def _band_stats(s: pd.Series) -> str:
466
+ if s is None or s.empty: return "—"
467
+ return f"CAPM E[r] {float(s['mu_capm'])*100:.2f}%, σ(h) {float(s['sigma_hist'])*100:.2f}%"
468
+
469
+ low_stats = _band_stats(outs["best_low"])
470
+ med_stats = _band_stats(outs["best_med"])
471
+ high_stats = _band_stats(outs["best_high"])
472
+
473
+ return img, outs["info"], outs["uni_msg"], outs["pos_table"], holdings, outs["csv_path"], low_stats, med_stats, high_stats
474
 
475
  # -------------- UI --------------
476
  with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
477
  gr.Markdown(
478
  "## Efficient Portfolio Advisor\n"
479
  "Plot uses **x = historical σ** and **y = CAPM E[r] = rf + β·ERP**. "
480
+ "Efficient (same σ) and (same E[r]) market/bills points are shown.\n\n"
481
+ "**Suggestions:** We re-weight the **same tickers you entered** to produce one Low/Medium/High pick. "
482
+ "Embeddings + exposure similarity select the best pick in each band."
483
  )
484
  with gr.Row():
485
  with gr.Column(scale=1):
 
491
  table = gr.Dataframe(value=pd.DataFrame(columns=["ticker","amount_usd"]), interactive=True)
492
  horizon = gr.Number(label="Horizon in years (1–100)", value=HORIZON_YEARS, precision=0)
493
  lookback = gr.Slider(1, 15, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years")
494
+
495
+ gr.Markdown("### Suggestions (one per band)")
496
+ with gr.Row():
497
+ low_btn = gr.Button("Show Low")
498
+ med_btn = gr.Button("Show Medium")
499
+ high_btn = gr.Button("Show High")
500
+ low_txt = gr.Markdown("Low: —")
501
+ med_txt = gr.Markdown("Medium: ")
502
+ high_txt = gr.Markdown("High: ")
503
+
 
 
504
  run_btn = gr.Button("Compute (build dataset & suggest)")
505
  with gr.Column(scale=1):
506
  plot = gr.Image(label="Capital Market Line (CAPM)", type="pil")
 
511
  label="Selected suggestion holdings (% / $)")
512
  dl = gr.File(label="Generated dataset CSV", value=None, visible=True)
513
 
514
+ # wire search / add / locking / horizon
515
  search_btn.click(fn=search_tickers_cb, inputs=q, outputs=[search_note, matches])
516
  add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
517
  table.change(fn=lock_ticker_column, inputs=table, outputs=table)
518
  horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
519
 
520
+ # main compute (defaults to Medium highlighted)
521
  run_btn.click(
522
+ fn=render_with_band,
523
+ inputs=[lookback, table, gr.State("Medium")],
524
+ outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_txt, med_txt, high_txt]
525
  )
526
+
527
+ # choose band with buttons
528
+ low_btn.click(
529
+ fn=render_with_band,
530
+ inputs=[lookback, table, gr.State("Low")],
531
+ outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_txt, med_txt, high_txt]
532
  )
533
+ med_btn.click(
534
+ fn=render_with_band,
535
+ inputs=[lookback, table, gr.State("Medium")],
536
+ outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_txt, med_txt, high_txt]
537
  )
538
+ high_btn.click(
539
+ fn=render_with_band,
540
+ inputs=[lookback, table, gr.State("High")],
541
+ outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_txt, med_txt, high_txt]
542
  )
543
 
544
+ # initialize risk-free at launch
545
  RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
546
  RF_ANN = fetch_fred_yield_annual(RF_CODE)
547
 
548
  if __name__ == "__main__":
549
+ demo.queue() # no concurrency_count to keep compatibility with older Gradio
550
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)), show_api=False, share=False)