Tulitula commited on
Commit
7e7ea16
·
verified ·
1 Parent(s): ef10717

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -160
app.py CHANGED
@@ -23,9 +23,8 @@ MAX_TICKERS = 30
23
  DEFAULT_LOOKBACK_YEARS = 5
24
  MARKET_TICKER = "VOO"
25
 
26
- # column schemas (weights in percent for UI tables)
27
  POS_COLS = ["ticker", "amount_usd", "weight_%", "beta"]
28
- SUG_COLS = ["ticker", "suggested_weight_%"]
29
 
30
  FRED_MAP = [
31
  (1, "DGS1"),
@@ -46,9 +45,6 @@ def ensure_data_dir():
46
  def empty_positions_df():
47
  return pd.DataFrame(columns=POS_COLS)
48
 
49
- def empty_suggest_df():
50
- return pd.DataFrame(columns=SUG_COLS)
51
-
52
  def fred_series_for_horizon(years: float) -> str:
53
  y = max(1.0, min(100.0, float(years)))
54
  for cutoff, code in FRED_MAP:
@@ -57,7 +53,6 @@ def fred_series_for_horizon(years: float) -> str:
57
  return "DGS30"
58
 
59
  def fetch_fred_yield_annual(code: str) -> float:
60
- # FRED CSV endpoint
61
  url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?id={code}"
62
  try:
63
  r = requests.get(url, timeout=10)
@@ -69,7 +64,6 @@ def fetch_fred_yield_annual(code: str) -> float:
69
  return 0.03
70
 
71
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
72
- # yfinance docs https://pypi.org/project/yfinance
73
  start = pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)
74
  end = pd.Timestamp.today(tz="UTC")
75
  df = yf.download(
@@ -82,7 +76,7 @@ def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
82
  )["Close"]
83
  if isinstance(df, pd.Series):
84
  df = df.to_frame()
85
- # Normalize column names to simple strings (yfinance can return MultiIndex)
86
  if isinstance(df.columns, pd.MultiIndex):
87
  df.columns = [c[-1] if isinstance(c, tuple) else str(c) for c in df.columns]
88
  else:
@@ -93,14 +87,10 @@ def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
93
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
94
  return prices.pct_change().dropna()
95
 
96
- def annualize_mean(m):
97
- return np.asarray(m, dtype=float) * 12.0
98
-
99
- def annualize_sigma(s):
100
- return np.asarray(s, dtype=float) * math.sqrt(12.0)
101
 
102
  def yahoo_search(query: str):
103
- # Yahoo symbol search
104
  if not query or len(query.strip()) == 0:
105
  return []
106
  url = "https://query1.finance.yahoo.com/v1/finance/search"
@@ -133,16 +123,13 @@ def validate_tickers(symbols: List[str], years: int) -> List[str]:
133
  # -------------- aligned moments --------------
134
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
135
  uniq = [c for c in dict.fromkeys(symbols)]
136
- # Always append market for alignment
137
  if MARKET_TICKER not in uniq:
138
  uniq = uniq + [MARKET_TICKER]
139
  px = fetch_prices_monthly(uniq, years)
140
  rets = monthly_returns(px)
141
  cols = [c for c in uniq if c in rets.columns]
142
  R = rets[cols].dropna(how="any")
143
- # Ensure unique simple columns
144
- R = R.loc[:, ~R.columns.duplicated()]
145
- return R
146
 
147
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
148
  R = get_aligned_monthly_returns(symbols, years)
@@ -170,7 +157,7 @@ def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
170
  elif ex_s is not None:
171
  betas[s] = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1] / var_m)
172
 
173
- # IMPORTANT: include market in covariance to avoid under-estimating risk when VOO is held
174
  asset_cols = list(R.columns)
175
  cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
176
  covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
@@ -232,7 +219,6 @@ def plot_cml(
232
  cml = rf_ann + slope * xs
233
  plt.plot(xs, cml, label="CML through VOO")
234
 
235
- # Key points
236
  plt.scatter([0.0], [rf_ann], label="Risk free")
237
  plt.scatter([sigma_mkt], [rf_ann + erp_ann], label="Market VOO")
238
  plt.scatter([pt_sigma], [pt_mu], label="Your portfolio")
@@ -241,46 +227,23 @@ def plot_cml(
241
  if targ_sigma is not None and targ_mu is not None:
242
  plt.scatter([targ_sigma], [targ_mu], label="Target suggestion")
243
 
244
- # Dotted guides and annotations that show the gap
245
- # Same sigma guide, vertical gap in return
246
- plt.plot(
247
- [pt_sigma, same_sigma_sigma],
248
- [pt_mu, same_sigma_mu],
249
- linestyle="--",
250
- linewidth=1.2,
251
- alpha=0.7,
252
- color="gray",
253
- )
254
  d_ret = (same_sigma_mu - pt_mu) * 100.0
255
- plt.annotate(
256
- f"Return gain at same sigma {d_ret:+.2f}%",
257
- xy=(same_sigma_sigma, same_sigma_mu),
258
- xytext=(same_sigma_sigma + 0.02 * xmax, same_sigma_mu),
259
- arrowprops=dict(arrowstyle="->", lw=1.0),
260
- fontsize=9,
261
- va="center",
262
- )
263
 
264
- # Same return guide, horizontal gap in sigma
265
- plt.plot(
266
- [pt_sigma, same_mu_sigma],
267
- [pt_mu, same_mu_mu],
268
- linestyle="--",
269
- linewidth=1.2,
270
- alpha=0.7,
271
- color="gray",
272
- )
273
  d_sig = (same_mu_sigma - pt_sigma) * 100.0
274
- plt.annotate(
275
- f"Risk change at same return {d_sig:+.2f}%",
276
- xy=(same_mu_sigma, same_mu_mu),
277
- xytext=(same_mu_sigma, same_mu_mu + 0.03),
278
- arrowprops=dict(arrowstyle="->", lw=1.0),
279
- fontsize=9,
280
- ha="center",
281
- )
282
 
283
- # Percent axes
284
  ax = plt.gca()
285
  ax.yaxis.set_major_formatter(PercentFormatter(1.0))
286
  ax.xaxis.set_major_formatter(PercentFormatter(1.0))
@@ -295,7 +258,7 @@ def plot_cml(
295
  buf.seek(0)
296
  return Image.open(buf)
297
 
298
- # -------------- synthetic dataset --------------
299
  def synth_profile(seed: int) -> str:
300
  rng = np.random.default_rng(seed)
301
  risk = rng.choice(["cautious", "balanced", "moderate", "growth", "aggressive"])
@@ -331,7 +294,6 @@ def save_synth_csv(df: pd.DataFrame, path: str = DATASET_PATH):
331
  os.makedirs(os.path.dirname(path), exist_ok=True)
332
  df.to_csv(path, index=False)
333
 
334
- # ----------- surrogate from saved CSV only -----------
335
  def _row_to_exposures(row: pd.Series, universe: List[str]) -> Optional[np.ndarray]:
336
  try:
337
  ts = [t.strip() for t in str(row["tickers"]).split(",")]
@@ -377,53 +339,7 @@ def predict_from_surrogate(amounts_map: Dict[str, float], universe: List[str],
377
  er_hat, sigma_hat, beta_hat = float(yhat[0]), float(yhat[1]), float(yhat[2])
378
  return er_hat, sigma_hat, beta_hat
379
 
380
- # ----------- target search over synthetic dataset -----------
381
- def target_best_from_synth(csv_path: str,
382
- universe: List[str],
383
- target_mu: Optional[float],
384
- target_sigma: Optional[float]):
385
- try:
386
- df = pd.read_csv(csv_path)
387
- except Exception:
388
- return None
389
-
390
- if target_mu is None and target_sigma is None:
391
- return None
392
-
393
- rows = []
394
- for _, r in df.iterrows():
395
- x = _row_to_exposures(r, universe)
396
- if x is None:
397
- continue
398
- rows.append((x, float(r["er_p"]), float(r["sigma_p"]), float(r["beta_p"]), r))
399
-
400
- if not rows:
401
- return None
402
-
403
- mu_w = 1.0
404
- sig_w = 1.0
405
- best = None
406
- best_d = float("inf")
407
- for x, er_p, sig_p, beta_p, r in rows:
408
- d = 0.0
409
- if target_mu is not None:
410
- d += mu_w * (er_p - target_mu) ** 2
411
- if target_sigma is not None:
412
- d += sig_w * (sig_p - target_sigma) ** 2
413
- if d < best_d:
414
- best_d = d
415
- best = (x, er_p, sig_p, beta_p, r)
416
-
417
- if best is None:
418
- return None
419
-
420
- x, er_p, sig_p, beta_p, r = best
421
- wmap = {t: float(x[i]) for i, t in enumerate(universe) if abs(float(x[i])) > 1e-4}
422
- top = sorted(wmap.items(), key=lambda kv: -abs(kv[1]))[:12]
423
- wmap_top = dict(top)
424
- return {"weights": wmap_top, "er": er_p, "sigma": sig_p, "beta": beta_p}
425
-
426
- # -------------- summary builder --------------
427
  def fmt_pct(x: float) -> str:
428
  return f"{x*100:.2f}%"
429
 
@@ -435,18 +351,17 @@ def humanize_synth(er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta):
435
  parts.append(f"- Predicted annual return {fmt_pct(er_hat)} , difference {fmt_pct(dmu)}")
436
  parts.append(f"- Predicted annual volatility {fmt_pct(sigma_hat)} , difference {fmt_pct(dsig)}")
437
  parts.append(f"- Predicted beta {beta_hat:.2f} , difference {dbeta:+.02f}")
438
- if close_mu and close_sig and close_beta:
439
- verdict = "The synthetic model matches the historical calculation closely. You can trust these quick predictions for similar mixes."
440
- else:
441
- verdict = "The synthetic model is not very close here. Rely more on the historical calculation for this mix."
442
  return "\n".join(parts + ["", f"**Verdict** {verdict}"])
443
 
444
  def build_summary_md(lookback, horizon, rf, rf_code, erp, sigma_mkt,
445
  beta_p, er_p, sigma_p,
446
  a_sigma, b_sigma, mu_eff_sigma,
447
  a_mu, b_mu, sigma_eff_mu,
448
- synth=None, synth_nrows: int = 0,
449
- targ=None) -> str:
450
  lines = []
451
  lines.append("### Inputs")
452
  lines.append(f"- Lookback years {lookback}")
@@ -465,14 +380,6 @@ def build_summary_md(lookback, horizon, rf, rf_code, erp, sigma_mkt,
465
  lines.append("### Synthetic prediction from data/investor_profiles.csv")
466
  lines.append(f"- Samples used {synth_nrows}")
467
  lines.append(humanize_synth(er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta))
468
- if targ is not None:
469
- lines.append("")
470
- lines.append("### Target driven suggestion from synthetic dataset")
471
- lines.append(f"- Suggested expected return {fmt_pct(targ['er'])}")
472
- lines.append(f"- Suggested sigma {fmt_pct(targ['sigma'])}")
473
- lines.append(f"- Suggested beta {targ['beta']:.2f}")
474
- pretty = ", ".join([f"{k} {v*100:+.2f}%" for k, v in targ["weights"].items()])
475
- lines.append(f"- Weights, exposure terms {pretty}")
476
  lines.append("")
477
  lines.append("### Efficient alternatives on CML")
478
  lines.append("Efficient same sigma")
@@ -539,20 +446,18 @@ def set_horizon(years: float):
539
  RF_ANN = rf
540
  return f"Risk free series {code}. Latest annual rate {rf:.2%}. Dataset will use this rate on compute."
541
 
542
- def compute(years_lookback: int, table: pd.DataFrame,
543
- target_mu: Optional[float], target_sigma: Optional[float],
544
- use_synth: bool):
545
  df = table.dropna()
546
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
547
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
548
 
549
  symbols = [t for t in df["ticker"].tolist() if t]
550
  if len(symbols) == 0:
551
- return None, "Add at least one ticker", "Universe empty", empty_positions_df(), empty_suggest_df(), None
552
 
553
  symbols = validate_tickers(symbols, years_lookback)
554
  if len(symbols) == 0:
555
- return None, "Could not validate any tickers", "Universe invalid", empty_positions_df(), empty_suggest_df(), None
556
 
557
  global UNIVERSE
558
  UNIVERSE = list(sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
@@ -566,7 +471,7 @@ def compute(years_lookback: int, table: pd.DataFrame,
566
 
567
  gross = sum(abs(v) for v in amounts.values())
568
  if gross == 0:
569
- return None, "All amounts are zero", "Universe ok", empty_positions_df(), empty_suggest_df(), None
570
  weights = {k: v / gross for k, v in amounts.items()}
571
 
572
  beta_p, er_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
@@ -574,7 +479,7 @@ def compute(years_lookback: int, table: pd.DataFrame,
574
  a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
575
  a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_p, rf_ann, erp_ann, sigma_mkt)
576
 
577
- # ensure dataset exists once
578
  if not os.path.exists(DATASET_PATH):
579
  synth_df = build_synthetic_dataset(
580
  universe=list(sorted(set(symbols + [MARKET_TICKER]))),
@@ -598,27 +503,12 @@ def compute(years_lookback: int, table: pd.DataFrame,
598
  er_hat - er_p, sigma_hat - sigma_p, beta_hat - beta_p
599
  )
600
 
601
- # target driven suggestion from synthetic dataset
602
- targ = None
603
- targ_table = empty_suggest_df()
604
- targ_sigma_plot = None
605
- targ_mu_plot = None
606
- if csv_path and (target_mu is not None or target_sigma is not None):
607
- cand = target_best_from_synth(csv_path, UNIVERSE, target_mu, target_sigma)
608
- if cand is not None:
609
- targ = cand
610
- targ_sigma_plot = cand["sigma"]
611
- targ_mu_plot = cand["er"]
612
- rows = [{"ticker": k, "suggested_weight_%": v * 100.0}
613
- for k, v in cand["weights"].items()]
614
- targ_table = pd.DataFrame(rows, columns=SUG_COLS)
615
-
616
  img = plot_cml(
617
  rf_ann, erp_ann, sigma_mkt,
618
  sigma_p, er_p,
619
  sigma_p, mu_eff_sigma,
620
  sigma_eff_mu, er_p,
621
- targ_sigma=targ_sigma_plot, targ_mu=targ_mu_plot
622
  )
623
 
624
  info = build_summary_md(
@@ -626,8 +516,7 @@ def compute(years_lookback: int, table: pd.DataFrame,
626
  beta_p, er_p, sigma_p,
627
  a_sigma, b_sigma, mu_eff_sigma,
628
  a_mu, b_mu, sigma_eff_mu,
629
- synth=synth_tuple, synth_nrows=nrows,
630
- targ=targ
631
  )
632
 
633
  rows = []
@@ -642,7 +531,7 @@ def compute(years_lookback: int, table: pd.DataFrame,
642
  pos_table = pd.DataFrame(rows, columns=POS_COLS)
643
 
644
  uni_msg = f"Universe set to {', '.join(UNIVERSE)}"
645
- return img, info, uni_msg, pos_table, targ_table, csv_path
646
 
647
  # -------------- UI --------------
648
  ensure_data_dir()
@@ -673,10 +562,7 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
673
  horizon = gr.Number(label="Horizon in years (1–100)", value=5, precision=0)
674
  lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for beta & sigma")
675
 
676
- gr.Markdown("### Optional targets on the CML")
677
- target_mu = gr.Number(label="Target expected return (annual, e.g. 0.12 = 12%)", value=None, precision=6)
678
- target_sigma = gr.Number(label="Target sigma (annual, e.g. 0.18 = 18%)", value=None, precision=6)
679
- use_synth = gr.Checkbox(label="Use synthetic predictor", value=True)
680
 
681
  run_btn = gr.Button("Compute and suggest")
682
  with gr.Column(scale=1):
@@ -691,15 +577,7 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
691
  value=empty_positions_df(),
692
  interactive=False
693
  )
694
- suggestions = gr.Dataframe(
695
- label="Suggested portfolio from targets",
696
- headers=SUG_COLS,
697
- datatype=["str", "number"],
698
- col_count=(len(SUG_COLS), "fixed"),
699
- value=empty_suggest_df(),
700
- interactive=False
701
- )
702
- dl = gr.File(label="Session CSV path", value=None, visible=True)
703
 
704
  def do_search(query):
705
  note, options = search_tickers_cb(query)
@@ -712,8 +590,8 @@ with gr.Blocks(title="Efficient Portfolio Advisor") as demo:
712
 
713
  run_btn.click(
714
  fn=compute,
715
- inputs=[lookback, table, target_mu, target_sigma, use_synth],
716
- outputs=[plot, summary, universe_msg, positions, suggestions, dl]
717
  )
718
 
719
  if __name__ == "__main__":
 
23
  DEFAULT_LOOKBACK_YEARS = 5
24
  MARKET_TICKER = "VOO"
25
 
26
+ # column schema (weights shown in percent in UI tables)
27
  POS_COLS = ["ticker", "amount_usd", "weight_%", "beta"]
 
28
 
29
  FRED_MAP = [
30
  (1, "DGS1"),
 
45
  def empty_positions_df():
46
  return pd.DataFrame(columns=POS_COLS)
47
 
 
 
 
48
  def fred_series_for_horizon(years: float) -> str:
49
  y = max(1.0, min(100.0, float(years)))
50
  for cutoff, code in FRED_MAP:
 
53
  return "DGS30"
54
 
55
  def fetch_fred_yield_annual(code: str) -> float:
 
56
  url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?id={code}"
57
  try:
58
  r = requests.get(url, timeout=10)
 
64
  return 0.03
65
 
66
  def fetch_prices_monthly(tickers: List[str], years: int) -> pd.DataFrame:
 
67
  start = pd.Timestamp.today(tz="UTC") - pd.DateOffset(years=years, days=7)
68
  end = pd.Timestamp.today(tz="UTC")
69
  df = yf.download(
 
76
  )["Close"]
77
  if isinstance(df, pd.Series):
78
  df = df.to_frame()
79
+ # yfinance sometimes returns MultiIndex columns
80
  if isinstance(df.columns, pd.MultiIndex):
81
  df.columns = [c[-1] if isinstance(c, tuple) else str(c) for c in df.columns]
82
  else:
 
87
  def monthly_returns(prices: pd.DataFrame) -> pd.DataFrame:
88
  return prices.pct_change().dropna()
89
 
90
+ def annualize_mean(m): return np.asarray(m, dtype=float) * 12.0
91
+ def annualize_sigma(s): return np.asarray(s, dtype=float) * math.sqrt(12.0)
 
 
 
92
 
93
  def yahoo_search(query: str):
 
94
  if not query or len(query.strip()) == 0:
95
  return []
96
  url = "https://query1.finance.yahoo.com/v1/finance/search"
 
123
  # -------------- aligned moments --------------
124
  def get_aligned_monthly_returns(symbols: List[str], years: int) -> pd.DataFrame:
125
  uniq = [c for c in dict.fromkeys(symbols)]
 
126
  if MARKET_TICKER not in uniq:
127
  uniq = uniq + [MARKET_TICKER]
128
  px = fetch_prices_monthly(uniq, years)
129
  rets = monthly_returns(px)
130
  cols = [c for c in uniq if c in rets.columns]
131
  R = rets[cols].dropna(how="any")
132
+ return R.loc[:, ~R.columns.duplicated()]
 
 
133
 
134
  def estimate_all_moments_aligned(symbols: List[str], years: int, rf_ann: float):
135
  R = get_aligned_monthly_returns(symbols, years)
 
157
  elif ex_s is not None:
158
  betas[s] = float(np.cov(ex_s.values, ex_m.values, ddof=1)[0, 1] / var_m)
159
 
160
+ # include market in covariance so risk is measured correctly when VOO is held
161
  asset_cols = list(R.columns)
162
  cov_m = np.cov(R[asset_cols].values.T, ddof=1) if asset_cols else np.zeros((0, 0))
163
  covA = pd.DataFrame(cov_m * 12.0, index=asset_cols, columns=asset_cols)
 
219
  cml = rf_ann + slope * xs
220
  plt.plot(xs, cml, label="CML through VOO")
221
 
 
222
  plt.scatter([0.0], [rf_ann], label="Risk free")
223
  plt.scatter([sigma_mkt], [rf_ann + erp_ann], label="Market VOO")
224
  plt.scatter([pt_sigma], [pt_mu], label="Your portfolio")
 
227
  if targ_sigma is not None and targ_mu is not None:
228
  plt.scatter([targ_sigma], [targ_mu], label="Target suggestion")
229
 
230
+ # Guides + annotations (in percent)
231
+ plt.plot([pt_sigma, same_sigma_sigma], [pt_mu, same_sigma_mu],
232
+ linestyle="--", linewidth=1.2, alpha=0.7, color="gray")
 
 
 
 
 
 
 
233
  d_ret = (same_sigma_mu - pt_mu) * 100.0
234
+ plt.annotate(f"Return gain at same sigma {d_ret:+.2f}%",
235
+ xy=(same_sigma_sigma, same_sigma_mu),
236
+ xytext=(same_sigma_sigma + 0.02 * xmax, same_sigma_mu),
237
+ arrowprops=dict(arrowstyle="->", lw=1.0), fontsize=9, va="center")
 
 
 
 
238
 
239
+ plt.plot([pt_sigma, same_mu_sigma], [pt_mu, same_mu_mu],
240
+ linestyle="--", linewidth=1.2, alpha=0.7, color="gray")
 
 
 
 
 
 
 
241
  d_sig = (same_mu_sigma - pt_sigma) * 100.0
242
+ plt.annotate(f"Risk change at same return {d_sig:+.2f}%",
243
+ xy=(same_mu_sigma, same_mu_mu),
244
+ xytext=(same_mu_sigma, same_mu_mu + 0.03),
245
+ arrowprops=dict(arrowstyle="->", lw=1.0), fontsize=9, ha="center")
 
 
 
 
246
 
 
247
  ax = plt.gca()
248
  ax.yaxis.set_major_formatter(PercentFormatter(1.0))
249
  ax.xaxis.set_major_formatter(PercentFormatter(1.0))
 
258
  buf.seek(0)
259
  return Image.open(buf)
260
 
261
+ # -------------- synthetic dataset (for the optional predictor) --------------
262
  def synth_profile(seed: int) -> str:
263
  rng = np.random.default_rng(seed)
264
  risk = rng.choice(["cautious", "balanced", "moderate", "growth", "aggressive"])
 
294
  os.makedirs(os.path.dirname(path), exist_ok=True)
295
  df.to_csv(path, index=False)
296
 
 
297
  def _row_to_exposures(row: pd.Series, universe: List[str]) -> Optional[np.ndarray]:
298
  try:
299
  ts = [t.strip() for t in str(row["tickers"]).split(",")]
 
339
  er_hat, sigma_hat, beta_hat = float(yhat[0]), float(yhat[1]), float(yhat[2])
340
  return er_hat, sigma_hat, beta_hat
341
 
342
+ # -------------- summary --------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  def fmt_pct(x: float) -> str:
344
  return f"{x*100:.2f}%"
345
 
 
351
  parts.append(f"- Predicted annual return {fmt_pct(er_hat)} , difference {fmt_pct(dmu)}")
352
  parts.append(f"- Predicted annual volatility {fmt_pct(sigma_hat)} , difference {fmt_pct(dsig)}")
353
  parts.append(f"- Predicted beta {beta_hat:.2f} , difference {dbeta:+.02f}")
354
+ verdict = ("The synthetic model matches the historical calculation closely. "
355
+ "You can trust these quick predictions for similar mixes."
356
+ if (close_mu and close_sig and close_beta)
357
+ else "The synthetic model is not very close here. Rely more on the historical calculation for this mix.")
358
  return "\n".join(parts + ["", f"**Verdict** {verdict}"])
359
 
360
  def build_summary_md(lookback, horizon, rf, rf_code, erp, sigma_mkt,
361
  beta_p, er_p, sigma_p,
362
  a_sigma, b_sigma, mu_eff_sigma,
363
  a_mu, b_mu, sigma_eff_mu,
364
+ synth=None, synth_nrows: int = 0) -> str:
 
365
  lines = []
366
  lines.append("### Inputs")
367
  lines.append(f"- Lookback years {lookback}")
 
380
  lines.append("### Synthetic prediction from data/investor_profiles.csv")
381
  lines.append(f"- Samples used {synth_nrows}")
382
  lines.append(humanize_synth(er_hat, sigma_hat, beta_hat, dmu, dsig, dbeta))
 
 
 
 
 
 
 
 
383
  lines.append("")
384
  lines.append("### Efficient alternatives on CML")
385
  lines.append("Efficient same sigma")
 
446
  RF_ANN = rf
447
  return f"Risk free series {code}. Latest annual rate {rf:.2%}. Dataset will use this rate on compute."
448
 
449
+ def compute(years_lookback: int, table: pd.DataFrame, use_synth: bool):
 
 
450
  df = table.dropna()
451
  df["ticker"] = df["ticker"].astype(str).str.upper().str.strip()
452
  df["amount_usd"] = pd.to_numeric(df["amount_usd"], errors="coerce").fillna(0.0)
453
 
454
  symbols = [t for t in df["ticker"].tolist() if t]
455
  if len(symbols) == 0:
456
+ return None, "Add at least one ticker", "Universe empty", empty_positions_df(), None
457
 
458
  symbols = validate_tickers(symbols, years_lookback)
459
  if len(symbols) == 0:
460
+ return None, "Could not validate any tickers", "Universe invalid", empty_positions_df(), None
461
 
462
  global UNIVERSE
463
  UNIVERSE = list(sorted(set([s for s in symbols if s != MARKET_TICKER] + [MARKET_TICKER])))[:MAX_TICKERS]
 
471
 
472
  gross = sum(abs(v) for v in amounts.values())
473
  if gross == 0:
474
+ return None, "All amounts are zero", "Universe ok", empty_positions_df(), None
475
  weights = {k: v / gross for k, v in amounts.items()}
476
 
477
  beta_p, er_p, sigma_p = portfolio_stats(weights, covA, betas, rf_ann, erp_ann)
 
479
  a_sigma, b_sigma, mu_eff_sigma = efficient_same_sigma(sigma_p, rf_ann, erp_ann, sigma_mkt)
480
  a_mu, b_mu, sigma_eff_mu = efficient_same_return(er_p, rf_ann, erp_ann, sigma_mkt)
481
 
482
+ # ensure synthetic dataset exists once (for predictor only)
483
  if not os.path.exists(DATASET_PATH):
484
  synth_df = build_synthetic_dataset(
485
  universe=list(sorted(set(symbols + [MARKET_TICKER]))),
 
503
  er_hat - er_p, sigma_hat - sigma_p, beta_hat - beta_p
504
  )
505
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506
  img = plot_cml(
507
  rf_ann, erp_ann, sigma_mkt,
508
  sigma_p, er_p,
509
  sigma_p, mu_eff_sigma,
510
  sigma_eff_mu, er_p,
511
+ targ_sigma=None, targ_mu=None
512
  )
513
 
514
  info = build_summary_md(
 
516
  beta_p, er_p, sigma_p,
517
  a_sigma, b_sigma, mu_eff_sigma,
518
  a_mu, b_mu, sigma_eff_mu,
519
+ synth=synth_tuple, synth_nrows=nrows
 
520
  )
521
 
522
  rows = []
 
531
  pos_table = pd.DataFrame(rows, columns=POS_COLS)
532
 
533
  uni_msg = f"Universe set to {', '.join(UNIVERSE)}"
534
+ return img, info, uni_msg, pos_table, csv_path
535
 
536
  # -------------- UI --------------
537
  ensure_data_dir()
 
562
  horizon = gr.Number(label="Horizon in years (1–100)", value=5, precision=0)
563
  lookback = gr.Slider(1, 10, value=DEFAULT_LOOKBACK_YEARS, step=1, label="Lookback years for beta & sigma")
564
 
565
+ use_synth = gr.Checkbox(label="Use synthetic predictor (fast check)", value=True)
 
 
 
566
 
567
  run_btn = gr.Button("Compute and suggest")
568
  with gr.Column(scale=1):
 
577
  value=empty_positions_df(),
578
  interactive=False
579
  )
580
+ dl = gr.File(label="Session CSV path (synthetic predictor data)", value=None, visible=True)
 
 
 
 
 
 
 
 
581
 
582
  def do_search(query):
583
  note, options = search_tickers_cb(query)
 
590
 
591
  run_btn.click(
592
  fn=compute,
593
+ inputs=[lookback, table, use_synth],
594
+ outputs=[plot, summary, universe_msg, positions, dl]
595
  )
596
 
597
  if __name__ == "__main__":