Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app.py (CML-safe
|
| 2 |
import os, io, math, time, warnings
|
| 3 |
warnings.filterwarnings("ignore")
|
| 4 |
|
|
@@ -22,8 +22,8 @@ MARKET_TICKER = "VOO"
|
|
| 22 |
|
| 23 |
SYNTH_ROWS = 1000
|
| 24 |
EMBED_MODEL_NAME = "FinLang/finance-embeddings-investopedia"
|
| 25 |
-
EMBED_ALPHA = 0.6
|
| 26 |
-
MMR_LAMBDA = 0.7
|
| 27 |
|
| 28 |
HORIZON_YEARS = 10
|
| 29 |
RF_CODE = "DGS10"
|
|
@@ -126,7 +126,7 @@ def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
|
|
| 126 |
betas[s] = cov_sm / var_m
|
| 127 |
betas[MARKET_TICKER] = 1.0
|
| 128 |
|
| 129 |
-
#
|
| 130 |
cov_all_ann = pd.DataFrame(np.cov(R.values.T, ddof=1) * 12.0,
|
| 131 |
index=R.columns, columns=R.columns)
|
| 132 |
|
|
@@ -186,19 +186,19 @@ def plot_cml_hybrid(rf_ann, erp_ann, sigma_mkt,
|
|
| 186 |
buf = io.BytesIO(); plt.savefig(buf, format="png"); plt.close(fig); buf.seek(0)
|
| 187 |
return Image.open(buf)
|
| 188 |
|
| 189 |
-
# -------------- synthetic dataset (
|
| 190 |
-
def
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
| 196 |
rng = np.random.default_rng(12345)
|
| 197 |
-
|
|
|
|
| 198 |
rows = []
|
| 199 |
for _ in range(n_rows):
|
| 200 |
-
k = int(rng.integers(low=2, high=min(8, len(universe)) + 1))
|
| 201 |
-
picks = list(rng.choice(universe, size=k, replace=False))
|
| 202 |
w = rng.dirichlet(np.ones(k))
|
| 203 |
beta_p = float(np.dot([betas.get(t, 0.0) for t in picks], w))
|
| 204 |
mu_capm = capm_er(beta_p, rf_ann, erp_ann)
|
|
@@ -219,16 +219,7 @@ def _band_bounds_sigma_hist(sigma_mkt: float, band: str) -> Tuple[float, float]:
|
|
| 219 |
if band.startswith("high"): return 1.2 * sigma_mkt, 3.0 * sigma_mkt
|
| 220 |
return 0.8 * sigma_mkt, 1.2 * sigma_mkt
|
| 221 |
|
| 222 |
-
|
| 223 |
-
if df.empty: return pd.DataFrame(columns=["pick","CAPM E[r] %","σ (hist) %","tickers"])
|
| 224 |
-
out = df.copy()
|
| 225 |
-
out = out.assign(**{"CAPM E[r] %": (out["mu_capm"]*100).round(2),
|
| 226 |
-
"σ (hist) %": (out["sigma_hist"]*100).round(2),
|
| 227 |
-
"tickers": out["tickers"]})[["CAPM E[r] %","σ (hist) %","tickers"]]
|
| 228 |
-
out = out.reset_index(drop=True); out.insert(0, "pick", [1,2,3][:len(out)])
|
| 229 |
-
return out
|
| 230 |
-
|
| 231 |
-
# -------------- embeddings & re-ranking --------------
|
| 232 |
_EMBED_MODEL = None
|
| 233 |
_TICKER_EMBED_CACHE: Dict[str, np.ndarray] = {}
|
| 234 |
|
|
@@ -272,48 +263,38 @@ def _exposure_similarity(user_map: Dict[str,float], cand_map: Dict[str,float]) -
|
|
| 272 |
c = {k:abs(v)/s_c for k,v in cand_map.items()}
|
| 273 |
common = set(u)&set(c); return float(sum(min(u[t],c[t]) for t in common))
|
| 274 |
|
| 275 |
-
def
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
try:
|
|
|
|
|
|
|
|
|
|
| 278 |
u_t = user_df["ticker"].astype(str).str.upper().tolist()
|
| 279 |
u_w = pd.to_numeric(user_df["amount_usd"], errors="coerce").fillna(0.0).tolist()
|
| 280 |
u_map = {t: float(w) for t, w in zip(u_t, u_w)}
|
| 281 |
u_embed = _portfolio_embedding(u_t, u_w)
|
| 282 |
|
| 283 |
-
|
| 284 |
for _, r in band_df.iterrows():
|
| 285 |
ts = [t.strip().upper() for t in str(r["tickers"]).split(",")]
|
| 286 |
ws = [float(x) for x in str(r["weights"]).split(",")]
|
| 287 |
s = sum(max(0.0,w) for w in ws) or 1.0
|
| 288 |
ws = [max(0.0,w)/s for w in ws]
|
| 289 |
c_map = {t:w for t,w in zip(ts,ws)}
|
| 290 |
-
c_embed = _portfolio_embedding(ts, ws)
|
| 291 |
expo_sim = _exposure_similarity(u_map, c_map)
|
| 292 |
emb_sim = _cos_sim(u_embed, c_embed)
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
cand_embeds = np.stack(cand_embeds, axis=0)
|
| 299 |
-
order = np.argsort([-s for s,_ in cand_rows])
|
| 300 |
-
picked = []; picked_idx = []
|
| 301 |
-
for i in order:
|
| 302 |
-
if len(picked)>=top_k: break
|
| 303 |
-
s_i, row_i = cand_rows[i]
|
| 304 |
-
if not picked:
|
| 305 |
-
picked.append(row_i); picked_idx.append(i); continue
|
| 306 |
-
sim_to_picked = max(_cos_sim(cand_embeds[i], cand_embeds[j]) for j in picked_idx)
|
| 307 |
-
mmr = mmr_lambda*s_i - (1.0-mmr_lambda)*sim_to_picked # noqa: F841 (kept for clarity)
|
| 308 |
-
picked.append(row_i); picked_idx.append(i)
|
| 309 |
-
out = pd.DataFrame([r for r in picked]).drop_duplicates().head(top_k).reset_index(drop=True)
|
| 310 |
-
if out.empty: out = band_df.head(top_k).reset_index(drop=True)
|
| 311 |
-
out.insert(0,"pick",[1,2,3][:len(out)])
|
| 312 |
-
return out
|
| 313 |
except Exception:
|
| 314 |
-
|
| 315 |
-
out.insert(0,"pick",[1,2,3][:len(out)])
|
| 316 |
-
return out
|
| 317 |
|
| 318 |
# -------------- UI helpers --------------
|
| 319 |
def empty_positions_df(): return pd.DataFrame(columns=["ticker","amount_usd","weight_exposure","beta"])
|
|
@@ -364,7 +345,8 @@ def lock_ticker_column(tb: Optional[pd.DataFrame]):
|
|
| 364 |
# -------------- compute core --------------
|
| 365 |
UNIVERSE: List[str] = [MARKET_TICKER, "QQQ", "VTI", "SOXX", "IBIT"]
|
| 366 |
|
| 367 |
-
def
|
|
|
|
| 368 |
ts = [t.strip().upper() for t in str(row["tickers"]).split(",")]
|
| 369 |
ws = [float(x) for x in str(row["weights"]).split(",")]
|
| 370 |
s = sum(max(0.0,w) for w in ws) or 1.0
|
|
@@ -372,20 +354,23 @@ def _pick_to_holdings(row: pd.Series, budget: float) -> pd.DataFrame:
|
|
| 372 |
return pd.DataFrame([{"ticker": t, "weight_%": round(w*100,2), "amount_$": round(w*budget,0)} for t,w in zip(ts,ws)],
|
| 373 |
columns=["ticker","weight_%","amount_$"])
|
| 374 |
|
| 375 |
-
def compute_all(years_lookback: int, table: Optional[pd.DataFrame]
|
|
|
|
| 376 |
df = table.copy() if isinstance(table,pd.DataFrame) else pd.DataFrame(columns=["ticker","amount_usd"])
|
| 377 |
df = df.dropna(how="all")
|
| 378 |
if "ticker" not in df.columns: df["ticker"] = []
|
| 379 |
if "amount_usd" not in df.columns: df["amount_usd"] = []
|
| 380 |
df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
|
| 381 |
df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
|
|
|
|
| 382 |
symbols = [t for t in df["ticker"].tolist() if t]
|
| 383 |
if len(symbols)==0: raise gr.Error("Add at least one ticker.")
|
| 384 |
symbols = validate_tickers(symbols, years_lookback)
|
| 385 |
if len(symbols)==0: raise gr.Error("Could not validate any tickers.")
|
| 386 |
|
| 387 |
global UNIVERSE
|
| 388 |
-
|
|
|
|
| 389 |
|
| 390 |
df = df[df["ticker"].isin(symbols)].copy()
|
| 391 |
amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
|
|
@@ -403,30 +388,29 @@ def compute_all(years_lookback: int, table: Optional[pd.DataFrame], use_embeddin
|
|
| 403 |
a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_hist, rf_ann, erp_ann, sigma_mkt)
|
| 404 |
a_mu, b_mu, sigma_eff_mu = efficient_same_return(mu_capm, rf_ann, erp_ann, sigma_mkt)
|
| 405 |
|
| 406 |
-
|
|
|
|
| 407 |
csv_path = os.path.join(DATA_DIR, f"investor_profiles_{int(time.time())}.csv")
|
| 408 |
try: synth.to_csv(csv_path, index=False)
|
| 409 |
except Exception: csv_path = None
|
| 410 |
|
| 411 |
-
|
|
|
|
| 412 |
lo, hi = _band_bounds_sigma_hist(sigma_mkt, band)
|
| 413 |
-
|
| 414 |
-
if
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
return top3
|
| 422 |
-
|
| 423 |
-
top3_low, top3_med, top3_high = band_top3("Low"), band_top3("Medium"), band_top3("High")
|
| 424 |
-
low_sum, med_sum, high_sum = _summarize_three(top3_low), _summarize_three(top3_med), _summarize_three(top3_high)
|
| 425 |
|
|
|
|
| 426 |
pos_table = pd.DataFrame([{
|
| 427 |
"ticker": t, "amount_usd": amounts.get(t,0.0),
|
| 428 |
"weight_exposure": weights.get(t,0.0),
|
| 429 |
-
"beta":
|
| 430 |
} for t in symbols], columns=["ticker","amount_usd","weight_exposure","beta"])
|
| 431 |
|
| 432 |
info = "\n".join([
|
|
@@ -446,6 +430,7 @@ def compute_all(years_lookback: int, table: Optional[pd.DataFrame], use_embeddin
|
|
| 446 |
f"- Same σ as your portfolio: Market {a_sigma:.2f}, Bills {b_sigma:.2f} → E[r] {mu_eff_sigma:.2%}",
|
| 447 |
f"- Same E[r] as your portfolio: Market {a_mu:.2f}, Bills {b_mu:.2f} → σ {sigma_eff_mu:.2%}",
|
| 448 |
"",
|
|
|
|
| 449 |
"_All points are guaranteed on/under the CML because σ uses the full covariance (incl. market)._"
|
| 450 |
])
|
| 451 |
|
|
@@ -454,23 +439,20 @@ def compute_all(years_lookback: int, table: Optional[pd.DataFrame], use_embeddin
|
|
| 454 |
mu_capm=mu_capm, sigma_hist=sigma_hist,
|
| 455 |
mu_eff_same_sigma=mu_eff_sigma, sigma_eff_same_return=sigma_eff_mu,
|
| 456 |
pos_table=pos_table, info=info, uni_msg=uni_msg, csv_path=csv_path,
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
if
|
| 469 |
-
sugg_mu = None; sugg_sigma_hist = None; holdings = empty_holdings_df()
|
| 470 |
-
else:
|
| 471 |
-
row = top3.iloc[min(idx, len(top3)-1)]
|
| 472 |
sugg_mu = float(row["mu_capm"]); sugg_sigma_hist = float(row["sigma_hist"])
|
| 473 |
-
holdings =
|
| 474 |
|
| 475 |
img = plot_cml_hybrid(
|
| 476 |
outs["rf_ann"], outs["erp_ann"], outs["sigma_mkt"],
|
|
@@ -478,16 +460,26 @@ def compute_and_render(years_lookback: int, table: Optional[pd.DataFrame], use_e
|
|
| 478 |
outs["mu_eff_same_sigma"], outs["sigma_eff_same_return"],
|
| 479 |
sugg_mu, sugg_sigma_hist
|
| 480 |
)
|
| 481 |
-
|
| 482 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
|
| 484 |
# -------------- UI --------------
|
| 485 |
with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
|
| 486 |
gr.Markdown(
|
| 487 |
"## Efficient Portfolio Advisor\n"
|
| 488 |
"Plot uses **x = historical σ** and **y = CAPM E[r] = rf + β·ERP**. "
|
| 489 |
-
"Efficient (same σ) and (same E[r]) market/bills points are shown
|
| 490 |
-
"Suggestions
|
|
|
|
| 491 |
)
|
| 492 |
with gr.Row():
|
| 493 |
with gr.Column(scale=1):
|
|
@@ -499,18 +491,16 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
|
|
| 499 |
table = gr.Dataframe(value=pd.DataFrame(columns=["ticker","amount_usd"]), interactive=True)
|
| 500 |
horizon = gr.Number(label="Horizon in years (1–100)", value=HORIZON_YEARS, precision=0)
|
| 501 |
lookback = gr.Slider(1, 15, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years")
|
| 502 |
-
|
| 503 |
-
gr.Markdown("### Suggestions")
|
| 504 |
-
with gr.
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
high_summary = gr.Dataframe(value=empty_holdings_df(), interactive=False, label="Top 3 (High risk)")
|
| 513 |
-
pick_high = gr.Radio(choices=["1","2","3"], value="1", label="Select a pick in High")
|
| 514 |
run_btn = gr.Button("Compute (build dataset & suggest)")
|
| 515 |
with gr.Column(scale=1):
|
| 516 |
plot = gr.Image(label="Capital Market Line (CAPM)", type="pil")
|
|
@@ -521,35 +511,40 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
|
|
| 521 |
label="Selected suggestion holdings (% / $)")
|
| 522 |
dl = gr.File(label="Generated dataset CSV", value=None, visible=True)
|
| 523 |
|
|
|
|
| 524 |
search_btn.click(fn=search_tickers_cb, inputs=q, outputs=[search_note, matches])
|
| 525 |
add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
|
| 526 |
table.change(fn=lock_ticker_column, inputs=table, outputs=table)
|
| 527 |
horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
|
| 528 |
|
|
|
|
| 529 |
run_btn.click(
|
| 530 |
-
fn=
|
| 531 |
-
inputs=[lookback, table,
|
| 532 |
-
outputs=[plot, summary, universe_msg, positions, selected_table, dl,
|
| 533 |
)
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
|
|
|
|
|
|
| 538 |
)
|
| 539 |
-
|
| 540 |
-
fn=
|
| 541 |
-
inputs=[lookback, table,
|
| 542 |
-
outputs=[plot, summary, universe_msg, positions, selected_table, dl,
|
| 543 |
)
|
| 544 |
-
|
| 545 |
-
fn=
|
| 546 |
-
inputs=[lookback, table,
|
| 547 |
-
outputs=[plot, summary, universe_msg, positions, selected_table, dl,
|
| 548 |
)
|
| 549 |
|
|
|
|
| 550 |
RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
|
| 551 |
RF_ANN = fetch_fred_yield_annual(RF_CODE)
|
| 552 |
|
| 553 |
if __name__ == "__main__":
|
| 554 |
-
demo.queue()
|
| 555 |
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)), show_api=False, share=False)
|
|
|
|
| 1 |
+
# app.py — Efficient Portfolio Advisor (CML-safe, same-ticker suggestions, embeddings always on)
|
| 2 |
import os, io, math, time, warnings
|
| 3 |
warnings.filterwarnings("ignore")
|
| 4 |
|
|
|
|
| 22 |
|
| 23 |
SYNTH_ROWS = 1000
|
| 24 |
EMBED_MODEL_NAME = "FinLang/finance-embeddings-investopedia"
|
| 25 |
+
EMBED_ALPHA = 0.6 # exposure similarity weight
|
| 26 |
+
MMR_LAMBDA = 0.7 # diversity for re-ranking (kept for consistency even though we output 1 per band)
|
| 27 |
|
| 28 |
HORIZON_YEARS = 10
|
| 29 |
RF_CODE = "DGS10"
|
|
|
|
| 126 |
betas[s] = cov_sm / var_m
|
| 127 |
betas[MARKET_TICKER] = 1.0
|
| 128 |
|
| 129 |
+
# Full covariance including MARKET_TICKER
|
| 130 |
cov_all_ann = pd.DataFrame(np.cov(R.values.T, ddof=1) * 12.0,
|
| 131 |
index=R.columns, columns=R.columns)
|
| 132 |
|
|
|
|
| 186 |
buf = io.BytesIO(); plt.savefig(buf, format="png"); plt.close(fig); buf.seek(0)
|
| 187 |
return Image.open(buf)
|
| 188 |
|
| 189 |
+
# -------------- synthetic dataset (re-weights of SAME tickers) --------------
|
| 190 |
+
def build_synthetic_dataset_same_tickers(tickers: List[str],
|
| 191 |
+
cov_all_ann: pd.DataFrame,
|
| 192 |
+
betas: Dict[str, float],
|
| 193 |
+
rf_ann: float,
|
| 194 |
+
erp_ann: float,
|
| 195 |
+
n_rows: int = SYNTH_ROWS) -> pd.DataFrame:
|
| 196 |
+
"""Generate long-only Dirichlet weights over EXACTLY the user's tickers."""
|
| 197 |
rng = np.random.default_rng(12345)
|
| 198 |
+
picks = [t for t in tickers] # fixed set
|
| 199 |
+
k = len(picks)
|
| 200 |
rows = []
|
| 201 |
for _ in range(n_rows):
|
|
|
|
|
|
|
| 202 |
w = rng.dirichlet(np.ones(k))
|
| 203 |
beta_p = float(np.dot([betas.get(t, 0.0) for t in picks], w))
|
| 204 |
mu_capm = capm_er(beta_p, rf_ann, erp_ann)
|
|
|
|
| 219 |
if band.startswith("high"): return 1.2 * sigma_mkt, 3.0 * sigma_mkt
|
| 220 |
return 0.8 * sigma_mkt, 1.2 * sigma_mkt
|
| 221 |
|
| 222 |
+
# -------------- embeddings & scoring --------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
_EMBED_MODEL = None
|
| 224 |
_TICKER_EMBED_CACHE: Dict[str, np.ndarray] = {}
|
| 225 |
|
|
|
|
| 263 |
c = {k:abs(v)/s_c for k,v in cand_map.items()}
|
| 264 |
common = set(u)&set(c); return float(sum(min(u[t],c[t]) for t in common))
|
| 265 |
|
| 266 |
+
def pick_best_in_band(user_df: pd.DataFrame, band_df: pd.DataFrame,
|
| 267 |
+
alpha: float = EMBED_ALPHA, top_N: int = 50) -> pd.Series:
|
| 268 |
+
"""
|
| 269 |
+
Score candidates by alpha*exposure-sim + (1-alpha)*embedding-sim, among top_N by CAPM return.
|
| 270 |
+
Return the single best row.
|
| 271 |
+
"""
|
| 272 |
+
if band_df.empty: return pd.Series(dtype="float64")
|
| 273 |
try:
|
| 274 |
+
# restrict to strong candidates by return first
|
| 275 |
+
band_df = band_df.sort_values("mu_capm", ascending=False).head(top_N).reset_index(drop=True)
|
| 276 |
+
|
| 277 |
u_t = user_df["ticker"].astype(str).str.upper().tolist()
|
| 278 |
u_w = pd.to_numeric(user_df["amount_usd"], errors="coerce").fillna(0.0).tolist()
|
| 279 |
u_map = {t: float(w) for t, w in zip(u_t, u_w)}
|
| 280 |
u_embed = _portfolio_embedding(u_t, u_w)
|
| 281 |
|
| 282 |
+
scores = []
|
| 283 |
for _, r in band_df.iterrows():
|
| 284 |
ts = [t.strip().upper() for t in str(r["tickers"]).split(",")]
|
| 285 |
ws = [float(x) for x in str(r["weights"]).split(",")]
|
| 286 |
s = sum(max(0.0,w) for w in ws) or 1.0
|
| 287 |
ws = [max(0.0,w)/s for w in ws]
|
| 288 |
c_map = {t:w for t,w in zip(ts,ws)}
|
| 289 |
+
c_embed = _portfolio_embedding(ts, ws)
|
| 290 |
expo_sim = _exposure_similarity(u_map, c_map)
|
| 291 |
emb_sim = _cos_sim(u_embed, c_embed)
|
| 292 |
+
scores.append(alpha*expo_sim + (1.0-alpha)*emb_sim)
|
| 293 |
+
|
| 294 |
+
i_best = int(np.argmax(scores))
|
| 295 |
+
return band_df.iloc[i_best]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
except Exception:
|
| 297 |
+
return band_df.iloc[0]
|
|
|
|
|
|
|
| 298 |
|
| 299 |
# -------------- UI helpers --------------
|
| 300 |
def empty_positions_df(): return pd.DataFrame(columns=["ticker","amount_usd","weight_exposure","beta"])
|
|
|
|
| 345 |
# -------------- compute core --------------
|
| 346 |
UNIVERSE: List[str] = [MARKET_TICKER, "QQQ", "VTI", "SOXX", "IBIT"]
|
| 347 |
|
| 348 |
+
def _row_to_holdings(row: pd.Series, budget: float) -> pd.DataFrame:
|
| 349 |
+
if row is None or row.empty: return empty_holdings_df()
|
| 350 |
ts = [t.strip().upper() for t in str(row["tickers"]).split(",")]
|
| 351 |
ws = [float(x) for x in str(row["weights"]).split(",")]
|
| 352 |
s = sum(max(0.0,w) for w in ws) or 1.0
|
|
|
|
| 354 |
return pd.DataFrame([{"ticker": t, "weight_%": round(w*100,2), "amount_$": round(w*budget,0)} for t,w in zip(ts,ws)],
|
| 355 |
columns=["ticker","weight_%","amount_$"])
|
| 356 |
|
| 357 |
+
def compute_all(years_lookback: int, table: Optional[pd.DataFrame]):
|
| 358 |
+
# sanitize input table
|
| 359 |
df = table.copy() if isinstance(table,pd.DataFrame) else pd.DataFrame(columns=["ticker","amount_usd"])
|
| 360 |
df = df.dropna(how="all")
|
| 361 |
if "ticker" not in df.columns: df["ticker"] = []
|
| 362 |
if "amount_usd" not in df.columns: df["amount_usd"] = []
|
| 363 |
df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
|
| 364 |
df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
|
| 365 |
+
|
| 366 |
symbols = [t for t in df["ticker"].tolist() if t]
|
| 367 |
if len(symbols)==0: raise gr.Error("Add at least one ticker.")
|
| 368 |
symbols = validate_tickers(symbols, years_lookback)
|
| 369 |
if len(symbols)==0: raise gr.Error("Could not validate any tickers.")
|
| 370 |
|
| 371 |
global UNIVERSE
|
| 372 |
+
# Universe is your exact tickers (for suggestions we re-weight SAME tickers)
|
| 373 |
+
UNIVERSE = list(sorted(set(symbols)))[:MAX_TICKERS]
|
| 374 |
|
| 375 |
df = df[df["ticker"].isin(symbols)].copy()
|
| 376 |
amounts = {r["ticker"]: float(r["amount_usd"]) for _, r in df.iterrows()}
|
|
|
|
| 388 |
a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_hist, rf_ann, erp_ann, sigma_mkt)
|
| 389 |
a_mu, b_mu, sigma_eff_mu = efficient_same_return(mu_capm, rf_ann, erp_ann, sigma_mkt)
|
| 390 |
|
| 391 |
+
# dataset: re-weight EXACT same tickers you entered
|
| 392 |
+
synth = build_synthetic_dataset_same_tickers(UNIVERSE, cov_all_ann, betas, rf_ann, erp_ann, n_rows=SYNTH_ROWS)
|
| 393 |
csv_path = os.path.join(DATA_DIR, f"investor_profiles_{int(time.time())}.csv")
|
| 394 |
try: synth.to_csv(csv_path, index=False)
|
| 395 |
except Exception: csv_path = None
|
| 396 |
|
| 397 |
+
# one suggestion per band (best by embedding/exposure score among candidates in band)
|
| 398 |
+
def best_for_band(band: str) -> pd.Series:
|
| 399 |
lo, hi = _band_bounds_sigma_hist(sigma_mkt, band)
|
| 400 |
+
band_df = synth[(synth["sigma_hist"]>=lo) & (synth["sigma_hist"]<=hi)].copy()
|
| 401 |
+
if band_df.empty: band_df = synth.copy()
|
| 402 |
+
user_df = pd.DataFrame({"ticker": list(weights.keys()), "amount_usd": [amounts[t] for t in weights.keys()]})
|
| 403 |
+
return pick_best_in_band(user_df, band_df, EMBED_ALPHA, top_N=50)
|
| 404 |
+
|
| 405 |
+
best_low = best_for_band("Low")
|
| 406 |
+
best_med = best_for_band("Medium")
|
| 407 |
+
best_high = best_for_band("High")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
|
| 409 |
+
# derived displays
|
| 410 |
pos_table = pd.DataFrame([{
|
| 411 |
"ticker": t, "amount_usd": amounts.get(t,0.0),
|
| 412 |
"weight_exposure": weights.get(t,0.0),
|
| 413 |
+
"beta": betas.get(t, np.nan) if t != MARKET_TICKER else 1.0
|
| 414 |
} for t in symbols], columns=["ticker","amount_usd","weight_exposure","beta"])
|
| 415 |
|
| 416 |
info = "\n".join([
|
|
|
|
| 430 |
f"- Same σ as your portfolio: Market {a_sigma:.2f}, Bills {b_sigma:.2f} → E[r] {mu_eff_sigma:.2%}",
|
| 431 |
f"- Same E[r] as your portfolio: Market {a_mu:.2f}, Bills {b_mu:.2f} → σ {sigma_eff_mu:.2%}",
|
| 432 |
"",
|
| 433 |
+
"_Suggestions are single picks per band, re-weighting **the same tickers you entered**, and are chosen via embeddings + exposure similarity._",
|
| 434 |
"_All points are guaranteed on/under the CML because σ uses the full covariance (incl. market)._"
|
| 435 |
])
|
| 436 |
|
|
|
|
| 439 |
mu_capm=mu_capm, sigma_hist=sigma_hist,
|
| 440 |
mu_eff_same_sigma=mu_eff_sigma, sigma_eff_same_return=sigma_eff_mu,
|
| 441 |
pos_table=pos_table, info=info, uni_msg=uni_msg, csv_path=csv_path,
|
| 442 |
+
best_low=best_low, best_med=best_med, best_high=best_high,
|
| 443 |
+
budget=gross)
|
| 444 |
+
|
| 445 |
+
def render_with_band(years_lookback: int, table: Optional[pd.DataFrame], which_band: str):
|
| 446 |
+
outs = compute_all(years_lookback, table)
|
| 447 |
+
# pick which suggestion to highlight
|
| 448 |
+
row = outs["best_med"]
|
| 449 |
+
if (which_band or "").lower().startswith("low"): row = outs["best_low"]
|
| 450 |
+
if (which_band or "").lower().startswith("high"): row = outs["best_high"]
|
| 451 |
+
|
| 452 |
+
sugg_mu = None; sugg_sigma_hist = None; holdings = empty_holdings_df()
|
| 453 |
+
if isinstance(row, pd.Series) and not row.empty:
|
|
|
|
|
|
|
|
|
|
| 454 |
sugg_mu = float(row["mu_capm"]); sugg_sigma_hist = float(row["sigma_hist"])
|
| 455 |
+
holdings = _row_to_holdings(row, outs["budget"])
|
| 456 |
|
| 457 |
img = plot_cml_hybrid(
|
| 458 |
outs["rf_ann"], outs["erp_ann"], outs["sigma_mkt"],
|
|
|
|
| 460 |
outs["mu_eff_same_sigma"], outs["sigma_eff_same_return"],
|
| 461 |
sugg_mu, sugg_sigma_hist
|
| 462 |
)
|
| 463 |
+
|
| 464 |
+
# small stats for each band (single pick)
|
| 465 |
+
def _band_stats(s: pd.Series) -> str:
|
| 466 |
+
if s is None or s.empty: return "—"
|
| 467 |
+
return f"CAPM E[r] {float(s['mu_capm'])*100:.2f}%, σ(h) {float(s['sigma_hist'])*100:.2f}%"
|
| 468 |
+
|
| 469 |
+
low_stats = _band_stats(outs["best_low"])
|
| 470 |
+
med_stats = _band_stats(outs["best_med"])
|
| 471 |
+
high_stats = _band_stats(outs["best_high"])
|
| 472 |
+
|
| 473 |
+
return img, outs["info"], outs["uni_msg"], outs["pos_table"], holdings, outs["csv_path"], low_stats, med_stats, high_stats
|
| 474 |
|
| 475 |
# -------------- UI --------------
|
| 476 |
with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
|
| 477 |
gr.Markdown(
|
| 478 |
"## Efficient Portfolio Advisor\n"
|
| 479 |
"Plot uses **x = historical σ** and **y = CAPM E[r] = rf + β·ERP**. "
|
| 480 |
+
"Efficient (same σ) and (same E[r]) market/bills points are shown.\n\n"
|
| 481 |
+
"**Suggestions:** We re-weight the **same tickers you entered** to produce one Low/Medium/High pick. "
|
| 482 |
+
"Embeddings + exposure similarity select the best pick in each band."
|
| 483 |
)
|
| 484 |
with gr.Row():
|
| 485 |
with gr.Column(scale=1):
|
|
|
|
| 491 |
table = gr.Dataframe(value=pd.DataFrame(columns=["ticker","amount_usd"]), interactive=True)
|
| 492 |
horizon = gr.Number(label="Horizon in years (1–100)", value=HORIZON_YEARS, precision=0)
|
| 493 |
lookback = gr.Slider(1, 15, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years")
|
| 494 |
+
|
| 495 |
+
gr.Markdown("### Suggestions (one per band)")
|
| 496 |
+
with gr.Row():
|
| 497 |
+
low_btn = gr.Button("Show Low")
|
| 498 |
+
med_btn = gr.Button("Show Medium")
|
| 499 |
+
high_btn = gr.Button("Show High")
|
| 500 |
+
low_txt = gr.Markdown("Low: —")
|
| 501 |
+
med_txt = gr.Markdown("Medium: —")
|
| 502 |
+
high_txt = gr.Markdown("High: —")
|
| 503 |
+
|
|
|
|
|
|
|
| 504 |
run_btn = gr.Button("Compute (build dataset & suggest)")
|
| 505 |
with gr.Column(scale=1):
|
| 506 |
plot = gr.Image(label="Capital Market Line (CAPM)", type="pil")
|
|
|
|
| 511 |
label="Selected suggestion holdings (% / $)")
|
| 512 |
dl = gr.File(label="Generated dataset CSV", value=None, visible=True)
|
| 513 |
|
| 514 |
+
# wire search / add / locking / horizon
|
| 515 |
search_btn.click(fn=search_tickers_cb, inputs=q, outputs=[search_note, matches])
|
| 516 |
add_btn.click(fn=add_symbol, inputs=[matches, table], outputs=[table, search_note])
|
| 517 |
table.change(fn=lock_ticker_column, inputs=table, outputs=table)
|
| 518 |
horizon.change(fn=set_horizon, inputs=horizon, outputs=universe_msg)
|
| 519 |
|
| 520 |
+
# main compute (defaults to Medium highlighted)
|
| 521 |
run_btn.click(
|
| 522 |
+
fn=render_with_band,
|
| 523 |
+
inputs=[lookback, table, gr.State("Medium")],
|
| 524 |
+
outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_txt, med_txt, high_txt]
|
| 525 |
)
|
| 526 |
+
|
| 527 |
+
# choose band with buttons
|
| 528 |
+
low_btn.click(
|
| 529 |
+
fn=render_with_band,
|
| 530 |
+
inputs=[lookback, table, gr.State("Low")],
|
| 531 |
+
outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_txt, med_txt, high_txt]
|
| 532 |
)
|
| 533 |
+
med_btn.click(
|
| 534 |
+
fn=render_with_band,
|
| 535 |
+
inputs=[lookback, table, gr.State("Medium")],
|
| 536 |
+
outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_txt, med_txt, high_txt]
|
| 537 |
)
|
| 538 |
+
high_btn.click(
|
| 539 |
+
fn=render_with_band,
|
| 540 |
+
inputs=[lookback, table, gr.State("High")],
|
| 541 |
+
outputs=[plot, summary, universe_msg, positions, selected_table, dl, low_txt, med_txt, high_txt]
|
| 542 |
)
|
| 543 |
|
| 544 |
+
# initialize risk-free at launch
|
| 545 |
RF_CODE = fred_series_for_horizon(HORIZON_YEARS)
|
| 546 |
RF_ANN = fetch_fred_yield_annual(RF_CODE)
|
| 547 |
|
| 548 |
if __name__ == "__main__":
|
| 549 |
+
demo.queue() # no concurrency_count to keep compatibility with older Gradio
|
| 550 |
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)), show_api=False, share=False)
|