Jay1121's picture
Update app.py
b22336b verified
# -*- coding: utf-8 -*-
# app.py โ€” Bayesian Journey Dashboard (Colab-friendly, robust Excel + plots)
# Fixes:
# - โœ… ์ •๊ทœํ™” ์œ ํ‹ธ(_as_all, _ensure_key_cols ๋“ฑ) ํฌํ•จ
# - โœ… pick_row_for ํฌํ•จ
# - โœ… Plotly ์ถ• ๊ทธ๋ฆฌ๋“œ ์†์„ฑ ์ •๋ฆฌ(์œ ํšจํ•˜์ง€ ์•Š์€ prop ์ œ๊ฑฐ)
# - โœ… ํฌํŠธ ์ถฉ๋Œ ์‹œ ์ž๋™ ๋Œ€์ฒด ํฌํŠธ๋กœ ์žฌ์‹œ๋„
import os, json, re, traceback
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from dash import Dash, html, dcc, dash_table, Input, Output, State
from dash.dash_table import FormatTemplate
from dash.dash_table.Format import Format, Scheme
import dash # (NEW) ์ธํ„ฐ๋ž™์…˜ ๋กœ๊ทธ์šฉ
# (ํŒŒ์ผ ์ƒ๋‹จ import ๊ทผ์ฒ˜์— ์ถ”๊ฐ€)
import io
import hashlib
FLOW_SALT = os.getenv("FLOW_SALT", "phi-v1-2025-01") # ํ•„์š”์‹œ ํ™˜๊ฒฝ๋ณ€์ˆ˜๋กœ ๋ฐ”๊ฟ”์น˜๊ธฐ ๊ฐ€๋Šฅ
FLOW_SALT = os.getenv("FLOW_SALT", "phi-v1-2025-01")
FLOW_GLOBAL = True # True๋ฉด ์ „์—ญ ๊ณ ์ •, False๋ฉด ํ•ด์‹œ ๊ธฐ๋ฐ˜
GLOBAL_K = 11.3
def _flow_scale(seg, mod, loy):
if FLOW_GLOBAL:
return GLOBAL_K
key = f"{seg}|{mod}|{loy}|{FLOW_SALT}"
h = int(hashlib.sha256(key.encode("utf-8")).hexdigest()[:8], 16)
return 7.5 + (h % 1100) / 100.0
# ======== ์ธํ„ฐ๋ž™์…˜ ๊ณต์šฉ ์„ค์ • ========
GRAPH_CONFIG = {
"displayModeBar": True,
"scrollZoom": True, # ํœ ๋กœ ์คŒ
"doubleClick": "reset", # ๋”๋ธ”ํด๋ฆญ ๋ฆฌ์…‹
"modeBarButtonsToAdd": ["lasso2d", "select2d"],
"showTips": True,
}
# ===================== ๊ธฐ๋ณธ ๊ฒฝ๋กœ =====================
from pathlib import Path
ROOT = Path(__file__).resolve().parent
DEFAULT_PATH = os.getenv("EXCEL_PATH", str(ROOT / "bayesian_analysis_total_v1.xlsx"))
EXCEL_PATH = DEFAULT_PATH
# (load_excel ํ˜ธ์ถœ์€ DEFAULT_PATH ๊ทธ๋Œ€๋กœ์—ฌ๋„ ๋™์ž‘, ๋ช…์‹œํ•˜๋ ค๋ฉด EXCEL_PATH๋กœ)
# ===================== ๋ ˆ๋ฒจ ์ƒ์ˆ˜ =====================
LEVEL_OVERALL="์ „์ฒด"; LEVEL_SEGMENT="์„ธ๊ทธ๋จผํŠธ"; LEVEL_MODEL="๋ชจ๋ธ"
LEVEL_LOYALTY="์ถฉ์„ฑ๋„"; LEVEL_SEG_X_LOY="์„ธ๊ทธร—์ถฉ์„ฑ๋„"
LEVEL_SEG_X_MODEL="์„ธ๊ทธร—๋ชจ๋ธ"; LEVEL_MODEL_X_LOY="๋ชจ๋ธร—์ถฉ์„ฑ๋„"
LEVEL_MOD_X_SEG_X_LOY="๋ชจ๋ธร—์„ธ๊ทธร—์ถฉ์„ฑ๋„"
# === ์ •๊ทœํ™” ===
ALL_ALIASES = {"ALL","all","All","", " ", " ", "์ „์ฒด", "NONE","None","none","nan","NaN", None}
LVL_ALIASES = {
"๋ชจ๋ธ์ „์ฒดร—์„ธ๊ทธร—์ถฉ์„ฑ๋„": "๋ชจ๋ธร—์„ธ๊ทธร—์ถฉ์„ฑ๋„",
"์„ธ๊ทธx๋ชจ๋ธ": "์„ธ๊ทธร—๋ชจ๋ธ",
"๋ชจ๋ธx์ถฉ์„ฑ๋„": "๋ชจ๋ธร—์ถฉ์„ฑ๋„",
"์„ธ๊ทธx์ถฉ์„ฑ๋„": "์„ธ๊ทธร—์ถฉ์„ฑ๋„",
}
def _as_all(v) -> str:
s = "ALL" if v is None else str(v).strip()
return "ALL" if s in ALL_ALIASES else s
def _ensure_key_cols(df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
for c in ["analysis_level","segment","model","loyalty"]:
if c not in df.columns:
df[c] = "ALL"
df[c] = (
df[c].astype(str).str.strip()
.replace({
"": "ALL", "์ „์ฒด":"ALL",
"NONE":"ALL","None":"ALL","none":"ALL",
"nan":"ALL","NaN":"ALL",
"ALL":"ALL","All":"ALL","all":"ALL"
})
.fillna("ALL")
)
if "level" not in df.columns:
df["level"] = df["analysis_level"] if "analysis_level" in df.columns else "์ „์ฒด"
df["level"] = (
df["level"].astype(str).str.strip()
.replace({"ALL":"์ „์ฒด","All":"์ „์ฒด","all":"์ „์ฒด"})
.replace(LVL_ALIASES)
)
if "analysis_level" in df.columns:
df["analysis_level"] = df["analysis_level"].replace(LVL_ALIASES)
return df
# ---- Store JSON ๋กœ๋” & ์Šค์™‘ ๊ฐ์ง€ ์œ ํ‹ธ ----
def _looks_split_df_json(s: str) -> bool:
try:
o = json.loads(s)
# orient="split"๋Š” ์ตœ์†Œ columns/index/data 3์…‹์ด ์žˆ์Œ
return isinstance(o, dict) and {"columns","index","data"}.issubset(set(o.keys()))
except Exception:
return False
def _looks_overall_json(s: str) -> bool:
try:
o = json.loads(s)
return isinstance(o, dict) and any(k in o for k in ("pref_mean","rec_mean","intent_mean","buy_mean"))
except Exception:
return False
def _safe_read_df_split(js: str | dict | None) -> pd.DataFrame:
if js is None:
return pd.DataFrame()
if isinstance(js, dict): # ์ด๋ฏธ ํŒŒ์‹ฑ๋œ ๊ฒฝ์šฐ
# dict๊ฐ€ split ์Šคํ‚ค๋งˆ์ธ ๊ฒฝ์šฐ๋งŒ ์ฒ˜๋ฆฌ
if {"columns","index","data"}.issubset(set(js.keys())):
return pd.read_json(io.StringIO(json.dumps(js)), orient="split")
return pd.DataFrame()
# str
try:
return pd.read_json(io.StringIO(js), orient="split")
except Exception:
return pd.DataFrame()
def _safe_read_overall(js: str | dict | None) -> dict:
if js is None:
return {}
if isinstance(js, dict):
return js
try:
o = json.loads(js)
return o if isinstance(o, dict) else {}
except Exception:
return {}
def _maybe_swap_sankey_overall(js_sankey, js_overall):
"""
sankey ์บ์‹œ์™€ overall์ด ๋’ค๋ฐ”๋€Œ์–ด ๋“ค์–ด์˜จ ๊ฒฝ์šฐ ์ž๋™ ๊ต์ •.
(js_sankey๊ฐ€ overall dict์ด๊ณ , js_overall์ด split DF JSON์ธ ์ผ€์ด์Šค)
"""
try:
if isinstance(js_sankey, str) and _looks_overall_json(js_sankey) \
and isinstance(js_overall, str) and _looks_split_df_json(js_overall):
return js_overall, js_sankey, True # (๊ต์ •๋œ sankey, overall, swapped?)
except Exception:
pass
return js_sankey, js_overall, False
_read_df_store = _safe_read_df_split
_read_overall = _safe_read_overall
def _rebuild_hkey_using_level(df: pd.DataFrame) -> pd.DataFrame:
df = _ensure_key_cols(df).copy()
if "level" in df.columns and df["level"].notna().any():
pass
elif "analysis_level" in df.columns:
df["level"] = df["analysis_level"]
else:
df["level"] = "์ „์ฒด"
for c in ["level","segment","model","loyalty"]:
if c != "level":
df[c] = (
df[c].astype(str).str.strip()
.replace({"": "ALL","์ „์ฒด":"ALL","NONE":"ALL","None":"ALL","none":"ALL","nan":"ALL","NaN":"ALL"})
.fillna("ALL")
)
df["level"] = df["level"].replace(LVL_ALIASES)
df["hierarchy_key"] = df["level"] + "|" + df["segment"] + "|" + df["model"] + "|" + df["loyalty"]
return df
def sample_col_in_df(df) -> str | None:
for c in ["pref_sample_size","sample_size","n","N","base","๋ฒ ์ด์Šค์ˆ˜","ํ‘œ๋ณธ์ˆ˜"]:
if c in df.columns: return c
return None
# ==== ๋น„๊ณต๊ฐœ ์œ ๋Ÿ‰ ์Šค์ผ€์ผ ====
FLOW_GLOBAL = True
GLOBAL_K = 11.3
# ==== ๊ณต์šฉ: Shape-safe helpers (๊ฐ€์งœ ํ‚ค ์ž๋™ ์ฐจ๋‹จ + ๋ณด์ •) ====
_ALLOWED_SHAPE_KEYS = {
"editable","fillcolor","fillrule","label","layer","legend","legendgroup","legendgrouptitle",
"legendrank","legendwidth","line","name","opacity","path","showlegend","templateitemname",
"type","visible","x0","x1","xanchor","xref","xsizemode","y0","y1","yanchor","yref","ysizemode",
}
# ์—ฌ๊ธฐ๊ฐ€ ๋ฌธ์ œ์˜ ๊ฐ€์งœ ํ‚ค๋“ค
_SHIFT_KEYS = ("x0shift", "x1shift", "y0shift", "y1shift")
def _line_from_kwargs(kwargs: dict):
line = {}
if "line_color" in kwargs: line["color"] = kwargs.pop("line_color")
if "line_width" in kwargs: line["width"] = kwargs.pop("line_width")
if "line_dash" in kwargs: line["dash"] = kwargs.pop("line_dash")
return {k: v for k, v in line.items() if v is not None}
def _clean_shape_kwargs(kwargs: dict):
"""
1) *_shift ํ‚ค ์ œ๊ฑฐ
2) line_* โ†’ line ๋ณ‘ํ•ฉ
3) ํ—ˆ์šฉ ํ‚ค๋งŒ ๋‚จ๊ธฐ๊ธฐ
"""
kwargs = dict(kwargs) # shallow copy
# 1) ๊ฐ€์งœ shift ํ‚ค ๋ชจ๋‘ ์ œ๊ฑฐ
for k in _SHIFT_KEYS:
kwargs.pop(k, None)
# 2) line_* โ†’ line ๋ณ‘ํ•ฉ
line = _line_from_kwargs(kwargs)
if line:
base_line = kwargs.get("line") or {}
kwargs["line"] = {**base_line, **line}
# 3) ํ—ˆ์šฉ ํ‚ค๋งŒ ํ†ต๊ณผ
return {k: v for k, v in kwargs.items() if (k in _ALLOWED_SHAPE_KEYS and v is not None)}
def add_vline_safe(fig, x, **kwargs):
"""์„ธ๋กœ ๊ธฐ์ค€์„ (๊ฐ€์งœ ํ‚ค ์ฐจ๋‹จ, line_* ๋ณ‘ํ•ฉ)"""
base = dict(
type="line", xref="x", x0=float(x), x1=float(x),
yref="paper", y0=0, y1=1,
layer=kwargs.pop("layer", "above"),
)
if "opacity" in kwargs and kwargs["opacity"] is not None:
base["opacity"] = kwargs.pop("opacity")
base.update(_clean_shape_kwargs(kwargs))
return fig.add_shape(**base)
def add_hline_safe(fig, y, **kwargs):
"""๊ฐ€๋กœ ๊ธฐ์ค€์„ (๊ฐ€์งœ ํ‚ค ์ฐจ๋‹จ, line_* ๋ณ‘ํ•ฉ)"""
base = dict(
type="line", yref="y", y0=float(y), y1=float(y),
xref="paper", x0=0, x1=1,
layer=kwargs.pop("layer", "above"),
)
if "opacity" in kwargs and kwargs["opacity"] is not None:
base["opacity"] = kwargs.pop("opacity")
base.update(_clean_shape_kwargs(kwargs))
return fig.add_shape(**base)
def _pad_top(fig, px=40):
# ๊ธฐ์กด margin ์œ ์ง€ + top๋งŒ ๋Š˜๋ฆผ
m = fig.layout.margin or {}
fig.update_layout(margin=dict(
l=int(getattr(m, "l", 10) or 10),
r=int(getattr(m, "r", 10) or 10),
b=int(getattr(m, "b", 10) or 10),
t=int(getattr(m, "t", 0) or 0) + int(px),
))
return fig
def add_vrect_safe(fig, x0, x1, **kwargs):
"""
add_vrect ๋Œ€์ฒด: x0shift/x1shift๋ฅผ ๊ฐ’์— ๋ฐ˜์˜ ํ›„ ์ œ๊ฑฐํ•˜๊ณ ,
๋‚˜๋จธ์ง€ ํ‚ค๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ์ •๋ฆฌํ•ด์„œ rect shape๋กœ ์ถ”๊ฐ€.
"""
# โ”€โ”€ shift ๋ณด์ • โ”€โ”€
dx0 = float(kwargs.pop("x0shift", 0) or 0)
dx1 = float(kwargs.pop("x1shift", 0) or 0)
x0 = float(x0) + dx0
x1 = float(x1) + dx1
# yref ์ž๋™ ํŒ์ •(๋ช…์‹œ๊ฐ€ ์žˆ์œผ๋ฉด ์กด์ค‘)
yref = kwargs.pop("yref", None)
has_y = ("y0" in kwargs) or ("y1" in kwargs)
if yref is None:
yref = "y" if has_y else "paper"
# paper ์ขŒํ‘œ ๊ธฐ๋ณธ๊ฐ’
y0_default, y1_default = (0, 1) if yref == "paper" else (None, None)
base = dict(
type="rect", xref="x", x0=x0, x1=x1,
yref=yref, y0=kwargs.pop("y0", y0_default), y1=kwargs.pop("y1", y1_default),
layer=kwargs.pop("layer", "below"),
fillcolor=kwargs.pop("fillcolor", "rgba(0,0,0,0.06)"),
)
if base["yref"] == "y":
# ๋ฐ์ดํ„ฐ ์ถ•์ด๋ฉด None์ธ y0/y1 ์ œ๊ฑฐ
if base.get("y0") is None: base.pop("y0", None)
if base.get("y1") is None: base.pop("y1", None)
if "opacity" in kwargs and kwargs["opacity"] is not None:
base["opacity"] = kwargs.pop("opacity")
base.update(_clean_shape_kwargs(kwargs))
return fig.add_shape(**base)
# (์„ ํƒ) ๋งŒ์•ฝ ์–ด๋”˜๊ฐ€์—์„œ layout.shapes์— ์ง์ ‘ dict๋ฅผ ๋„ฃ๋Š”๋‹ค๋ฉด:
def sanitize_shape_dict(d: dict) -> dict:
"""์™ธ๋ถ€/๋ ˆ๊ฑฐ์‹œ shape dict์„ ์•ˆ์ „ํ•˜๊ฒŒ ์ •์ œ.
- x0shift/x1shift/y0shift/y1shift ๊ฐ’์„ ์ขŒํ‘œ์— ๋ฐ˜์˜ํ•˜๊ณ  ํ‚ค ์ œ๊ฑฐ
- line_* ํ‚ค ๋ณ‘ํ•ฉ
- ํ—ˆ์šฉ๋˜์ง€ ์•Š๋Š” ํ‚ค ์‚ญ์ œ
"""
d = dict(d or {})
# 1) shift -> ์ขŒํ‘œ ๋ฐ˜์˜
for sh_key, coord_key in (("x0shift","x0"),("x1shift","x1"),("y0shift","y0"),("y1shift","y1")):
if sh_key in d:
try:
if coord_key in d and d[coord_key] is not None:
d[coord_key] = float(d[coord_key]) + float(d.pop(sh_key) or 0.0)
else:
d.pop(sh_key, None)
except Exception:
d.pop(sh_key, None)
# 2) line_* -> line ๋ณ‘ํ•ฉ
line = {}
if "line_color" in d: line["color"] = d.pop("line_color")
if "line_width" in d: line["width"] = d.pop("line_width")
if "line_dash" in d: line["dash"] = d.pop("line_dash")
if line:
base_line = d.get("line") or {}
d["line"] = {**base_line, **{k:v for k,v in line.items() if v is not None}}
# 3) ํ—ˆ์šฉ ํ‚ค๋งŒ ๋‚จ๊ธฐ๊ธฐ
return {k: v for k, v in d.items() if (k in _ALLOWED_SHAPE_KEYS and v is not None)}
def _scrub_layout_shapes(fig: go.Figure) -> go.Figure:
"""
layout.shapes์— ๋‚จ์•„์žˆ๋Š” ๋น„์ •์ƒ ํ‚ค(x0shift ๊ฐ™์€ ์ž”์žฌ)๋ฅผ ์ผ๊ด„ ์ œ๊ฑฐ.
"""
try:
shapes = list(fig.layout.shapes) if fig.layout.shapes is not None else []
cleaned = []
for sh in shapes:
try:
sd = sh.to_plotly_json() if hasattr(sh, "to_plotly_json") else dict(sh)
cleaned.append(sanitize_shape_dict(sd)) # โ† ๊ธฐ์กด ์œ ํ‹ธ ์žฌ์‚ฌ์šฉ
except Exception:
# ํ•˜๋‚˜๋ผ๋„ ๋ฌธ์ œ๋ฉด ๊ทธ๋ƒฅ ๊ฑด๋„ˆ๋œ€(๋„๋ฉด ๊นจ์ง€์ง€ ์•Š๊ฒŒ)
continue
fig.update_layout(shapes=cleaned)
except Exception:
pass
return fig
def sanitize_fig_shapes(fig):
"""fig.layout.shapes ์ „๋ถ€ sanitize."""
try:
shapes = list(fig.layout.shapes) if fig.layout.shapes else []
except Exception:
shapes = []
if not shapes:
return fig
new_shapes = []
for sh in shapes:
try:
sd = sh.to_plotly_json() if hasattr(sh, "to_plotly_json") else dict(sh)
new_shapes.append(sanitize_shape_dict(sd))
except Exception:
# ๋ง๊ฐ€์ง„ ๊ฑด ๋ฒ„๋ฆผ
pass
fig.update_layout(shapes=new_shapes)
return fig
# ===================== ํŒ”๋ ˆํŠธ =====================
COL_RED = "#C32C2C" # ๋นจ๊ฐ•
COL_ORANGE = "#D24D3E" # ์ฃผํ™ฉ
COL_YELLOW = "#DE937A" # ๋…ธ๋ž‘
COL_BEIGE = "#D49442" # ๋ฒ ์ด์ง€
COL_GREEN_LITE = "#2B8E81" # ์ดˆ๋ก(๊ธฐ๋ณธ)
COL_GREEN_DARK = "#21786E" # ์ดˆ๋ก ์ง„ํ•œํ†ค(ํ•„์š”์‹œ)
COL_GRAY = "#D3D3D3"
def _hex_to_rgb_tuple(h): # ์œ ํ‹ธ
h = h.lstrip("#")
return [int(h[i:i+2], 16) for i in (0,2,4)]
def royg_color_for(values: np.ndarray) -> list:
v = np.asarray(values, dtype=float)
if v.size == 0: return []
if not np.isfinite(v).any():
return [COL_GREEN_DARK] * len(v)
lo = np.nanmin(v); hi = np.nanmax(v)
t = np.zeros_like(v) if (not np.isfinite(lo) or not np.isfinite(hi) or hi-lo < 1e-12) else (v-lo)/(hi-lo)
# ๋‚ฎ์€๊ฐ’(์ข‹์Œ) โ†’ ๋†’์€๊ฐ’(๋‚˜์จ): ์ดˆ โ†’ ๋ฒ  โ†’ ๋…ธ โ†’ ์ฃผ โ†’ ๋นจ
cols = np.array([
_hex_to_rgb_tuple(COL_GREEN_LITE),
_hex_to_rgb_tuple(COL_BEIGE),
_hex_to_rgb_tuple(COL_YELLOW),
_hex_to_rgb_tuple(COL_ORANGE),
_hex_to_rgb_tuple(COL_RED),
], dtype=float)
stops = np.array([0.0, 0.25, 0.5, 0.75, 1.0])
r = np.interp(t, stops, cols[:,0]); g = np.interp(t, stops, cols[:,1]); b = np.interp(t, stops, cols[:,2])
out = []
for rr, gg, bb in zip(r,g,b):
if not (np.isfinite(rr) and np.isfinite(gg) and np.isfinite(bb)):
out.append('rgb(140,140,140)')
else:
out.append(f'rgb({int(round(rr))},{int(round(gg))},{int(round(bb))})')
return out
# ==== DESIGN CONSTANTS (tiers & neutrals) ====
COL_BLUE_DEEP = "#1E3A8A" # ์ง„ํŒŒ๋ž‘(ํ•˜์ด์—”๋“œ)
COL_BLUE_SKY = "#60A5FA" # ํ•˜๋Š˜(๋ฏธ๋“œ)
COL_GRAY_MED = "#9CA3AF" # ํšŒ์ƒ‰(๋กœ์šฐ/์ค‘๋ฆฝ)
COL_BLACK = "#111111" # ํฌ๋ ˆ์ŠคํŠธ ํ”Œ๋กฏ์šฉ
# ์„ธ๊ทธ/ํ‹ฐ์–ด โ†’ ์ƒ‰ ๋งคํ•‘ (๋ชจ๋“  ํ‚ค๋Š” ์†Œ๋ฌธ์ž ๊ธฐ์ค€์œผ๋กœ ์ €์žฅ)
_SEG_TIER_COLOR = {
# High/Premium ๊ณ„์—ด
"highend": COL_BLUE_DEEP, "high": COL_BLUE_DEEP, "premium": COL_BLUE_DEEP,
"ํ•˜์ด์—”๋“œ": COL_BLUE_DEEP, "ํ”„๋ฆฌ๋ฏธ์—„": COL_BLUE_DEEP,
# Mid ๊ณ„์—ด
"midend": COL_BLUE_SKY, "mid": COL_BLUE_SKY, "midrange": COL_BLUE_SKY,
"๋ฏธ๋“œ": COL_BLUE_SKY, "์ค‘๊ฐ„": COL_BLUE_SKY,
# Low/Entry ๊ณ„์—ด
"lowend": COL_GRAY_MED, "low": COL_GRAY_MED, "entry": COL_GRAY_MED,
"๋กœ์šฐ์—”๋“œ": COL_GRAY_MED, "์ €๊ฐ€": COL_GRAY_MED,
}
def _norm_key(x) -> str:
return "" if x is None else str(x).strip().lower()
def _tier_color_for_segment(seg: str) -> str:
"""์„ธ๊ทธ ์ด๋ฆ„์„ ๋А์Šจํ•˜๊ฒŒ ๋ฐ›์•„ ์ปฌ๋Ÿฌ๋กœ ๋งคํ•‘(๋Œ€์†Œ๋ฌธ์ž/๊ณต๋ฐฑ/ํ•œ๊ธ€ ํ—ˆ์šฉ)."""
return _SEG_TIER_COLOR.get(_norm_key(seg), COL_GRAY_MED)
def _model_dominant_segment(df_scope: pd.DataFrame) -> dict:
"""
๋ชจ๋ธ๋ณ„ 'ํ‘œ๋ณธ์ˆ˜ ๊ฐ€์ค‘' ์šฐ์„ธ ์„ธ๊ทธ. segment๊ฐ€ ALL/์ „์ฒด์ธ ํ–‰์€ ์ œ์™ธ.
๋ฐ˜ํ™˜: {model(str): segment(str)}
"""
if df_scope is None or df_scope.empty or "model" not in df_scope.columns or "segment" not in df_scope.columns:
return {}
s = df_scope.copy()
# ALL/์ „์ฒด drop
seg_norm = s["segment"].astype(str).str.strip()
m_valid = ~seg_norm.isin(["ALL", "์ „์ฒด"]) & seg_norm.notna()
s = s[m_valid]
if s.empty:
return {}
w = pd.to_numeric(s.get("pref_sample_size", 1), errors="coerce").replace([np.inf, -np.inf], np.nan).fillna(1.0)
s["__w__"] = w
grp = s.groupby(["model", "segment"], as_index=False)["__w__"].sum()
# ๊ฐ ๋ชจ๋ธ์—์„œ ๊ฐ€์ค‘์น˜ ์ตœ๋Œ€์ธ ์„ธ๊ทธ 1๊ฐœ ์„ ํƒ
dom = grp.sort_values(["model", "__w__"], ascending=[True, False]).drop_duplicates("model")
return {str(r["model"]): str(r["segment"]) for _, r in dom.iterrows()}
# ===================== ์•ฑ =====================
app = Dash(__name__)
app.title = "Bayesian Journey Dashboard"
px.defaults.template = "plotly_white"
def _safe_num(x, default=np.nan):
try: return float(x)
except Exception: return default
def _safe_int0(x):
try:
v = float(x)
return int(v) if np.isfinite(v) else 0
except Exception:
return 0
def _norm_cols(df: pd.DataFrame) -> pd.DataFrame:
if df is None or df.empty: return pd.DataFrame()
df = df.copy()
df.columns = [str(c).strip() for c in df.columns]
for c in df.columns:
if df[c].dtype == "O":
ser = pd.to_numeric(df[c], errors="coerce")
if ser.notna().mean() >= 0.5: df[c] = ser
return df
def _ci_to_sd(lo, hi):
lo = np.asarray(lo, dtype=float); hi = np.asarray(hi, dtype=float)
return (hi - lo)/(2*1.96)
def _grade_from_p(p):
if not np.isfinite(p): return "N/A"
if p >= 0.70: return "A"
if p >= 0.55: return "B"
if p >= 0.45: return "C"
return "D"
def _auto_dtick(span):
# 0~1 ํผ์„ผํŠธ ์ถ• span ๊ธฐ์ค€
if span <= 0.30: return 0.05 # 5%
if span >= 0.80: return 0.20 # 20%
return 0.10 # 10%
def apply_dense_grid(fig: go.Figure, x_prob: bool = False, y_prob: bool = False) -> go.Figure:
# 1) ๊ธฐ์กด ๋†’์ด ๋ณด์กด(์—†์„ ๋•Œ๋งŒ 360 ์ง€์ •)
cur_h = getattr(fig.layout, "height", None)
fig.update_layout(
height=(cur_h if cur_h is not None else 360),
showlegend=True,
paper_bgcolor="#fff",
plot_bgcolor="#fff",
font=dict(color="#111"),
margin=dict(l=10, r=10, t=30, b=10),
)
# 2) ๊ธฐ๋ณธ ๊ฒฉ์ž
fig.update_xaxes(showline=False, mirror=False, linewidth=0)
fig.update_yaxes(showline=False, mirror=False, linewidth=0)
# 3) plotly ๋ฒ„์ „๋ณ„ minor ์˜ต์…˜ ์•ˆ์ „ ์ฒ˜๋ฆฌ
try:
fig.update_xaxes(minor=dict(showgrid=False))
fig.update_yaxes(minor=dict(showgrid=False))
except Exception:
pass
# 4) ํ™•๋ฅ ์ถ•(0~1) ํฌ๋งท
if x_prob:
xr = (getattr(fig.layout.xaxis, "range", None) or [0, 1])
span = (xr[1] - xr[0]) if isinstance(xr, (list, tuple)) and len(xr) == 2 else 1.0
fig.update_xaxes(tick0=0, dtick=_auto_dtick(span), tickformat=".0%")
if y_prob:
yr = (getattr(fig.layout.yaxis, "range", None) or [0, 1])
span = (yr[1] - yr[0]) if isinstance(yr, (list, tuple)) and len(yr) == 2 else 1.0
fig.update_yaxes(tick0=0, dtick=_auto_dtick(span), tickformat=".0%")
# 5) ์ธํ„ฐ๋ž™์…˜ ์ƒํƒœ ์œ ์ง€
fig.update_layout(uirevision="keep")
# 6) ๋ ˆ์ด์•„์›ƒ shape ์ž”์žฌ(x0shift ๋“ฑ) ์ „์—ญ ์Šคํฌ๋Ÿฝ
try:
fig = _scrub_layout_shapes(fig) # sanitize_shape_dict๋ฅผ ๋‚ด๋ถ€์—์„œ ํ™œ์šฉ
except Exception:
pass
return fig
# โ˜… ์—ฌ๊ธฐ ์ถ”๊ฐ€: ๋ชจ๋“  shape ์ •์ œ
try:
sanitize_fig_shapes(fig)
except Exception:
pass
return fig
# ---- Excel ์˜คํ”ˆ(์—”์ง„ ํด๋ฐฑ + ๋””๋ฒ„๊ทธ ์ˆ˜์ง‘) ----
def _open_excel_with_fallback(path: str):
errs = []
for eng in ["openpyxl", None, "xlrd"]:
try:
xls = pd.ExcelFile(path, engine=eng) if eng else pd.ExcelFile(path)
return xls, (eng or "auto")
except Exception as e:
errs.append(f"{(eng or 'auto')}: {type(e).__name__}::{e}")
raise RuntimeError("Excel open failed | " + " | ".join(errs))
def _find_sheet(xls: pd.ExcelFile, candidates):
names = xls.sheet_names
norm = lambda s: re.sub(r"\s+", "", str(s)).lower()
names_norm = {norm(n): n for n in names}
for cand in candidates:
cn = norm(cand)
for k, orig in names_norm.items():
if cn in k:
return orig
return None
def load_excel(path: str):
if not os.path.exists(path):
raise FileNotFoundError(f"์—‘์…€ ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค: {path}")
xls, used_engine = _open_excel_with_fallback(path)
sheets = list(xls.sheet_names)
sh_master = _find_sheet(xls, ["VBA๋งˆ์Šคํ„ฐํ…Œ์ด๋ธ”", "๋งˆ์Šคํ„ฐ", "master", "mastertable", "๋งˆ์Šคํ„ฐํ…Œ์ด๋ธ”"])
sh_tm = _find_sheet(xls, ["๋ฒ ์ด์ง€์•ˆ์ „์ดํ™•๋ฅ ๋งคํŠธ๋ฆญ์Šค", "์ „์ดํ™•๋ฅ ", "transition", "matrix"])
sh_sankey = _find_sheet(xls, ["๋ฒ ์ด์ง€์•ˆ์ƒํ‚ค๋‹ค์ด์–ด๊ทธ๋žจ", "์ƒํ‚ค", "sankey", "flow"])
dbg = {"engine": used_engine, "sheets": sheets,
"matched": {"master": sh_master, "tm": sh_tm, "sankey": sh_sankey}}
if not sh_master:
raise ValueError(f"ํ•„์ˆ˜ ์‹œํŠธ(๋งˆ์Šคํ„ฐ) ๋ฏธ๋ฐœ๊ฒฌ | sheets={sheets}")
df_master = _norm_cols(pd.read_excel(xls, sh_master))
df_tm = _norm_cols(pd.read_excel(xls, sh_tm)) if sh_tm else pd.DataFrame()
df_sankey = _norm_cols(pd.read_excel(xls, sh_sankey)) if sh_sankey else pd.DataFrame()
df_master = _rebuild_hkey_using_level(df_master)
if not df_tm.empty: df_tm = _rebuild_hkey_using_level(df_tm)
if not df_sankey.empty: df_sankey = _rebuild_hkey_using_level(df_sankey)
def col(name): return df_master.get(name, pd.Series(np.nan, index=df_master.index))
overall = {
"pref_mean": float(np.nanmean(col("pref_success_rate"))),
"rec_mean": float(np.nanmean(col("rec_success_rate"))),
"intent_mean": float(np.nanmean(col("intent_success_rate"))),
"buy_mean": float(np.nanmean(col("buy_success_rate"))),
"pref_sd": float(np.nanmean(_ci_to_sd(col("pref_ci_lower"), col("pref_ci_upper")))),
"rec_sd": float(np.nanmean(_ci_to_sd(col("rec_ci_lower"), col("rec_ci_upper")))),
"intent_sd": float(np.nanmean(_ci_to_sd(col("intent_ci_lower"), col("intent_ci_upper")))),
"buy_sd": float(np.nanmean(_ci_to_sd(col("buy_ci_lower"), col("buy_ci_upper")))),
}
seg_opts = ["ALL"] + sorted([str(v) for v in df_master["segment"].dropna().unique() if str(v)!="ALL"])
loy_opts = ["ALL"] + sorted([str(v) for v in df_master["loyalty"].dropna().unique() if str(v)!="ALL"])
mod_opts_all = ["ALL"] + sorted([str(v) for v in df_master["model"].dropna().unique() if str(v)!="ALL"])
return df_master, df_tm, df_sankey, overall, seg_opts, mod_opts_all, loy_opts, dbg
# ===================== ์„ ํƒ/์ง‘๊ณ„ ๋กœ์ง =====================
def pick_row_for(df_master: pd.DataFrame, seg, mod, loy):
seg = _as_all(seg); mod = _as_all(mod); loy = _as_all(loy)
df = _ensure_key_cols(df_master)
sort_col = sample_col_in_df(df)
if sort_col is None:
sort_col = "__tmp_n__"; df[sort_col] = 1
def add_pref_score(sub: pd.DataFrame) -> pd.DataFrame:
# ์‚ฌ์šฉ์ž๊ฐ€ ALL๋กœ ๋‘” ์ฐจ์›์€ ALL์„ ์„ ํ˜ธ(=๋œ ๊ตฌ์ฒด์ ์ธ ํ–‰์„ ์ƒ๋‹จ์—)
score = 0
if seg == "ALL": score += (sub["segment"]=="ALL").astype(int)
if mod == "ALL": score += (sub["model"]=="ALL").astype(int)
if loy == "ALL": score += (sub["loyalty"]=="ALL").astype(int)
sub = sub.copy(); sub["__score__"] = score
return sub
chosen = (seg!="ALL") + (mod!="ALL") + (loy!="ALL")
wanted_levels = []
if chosen == 0:
wanted_levels = [LEVEL_OVERALL]
elif chosen == 1:
if seg!="ALL": wanted_levels = [LEVEL_SEGMENT, LEVEL_OVERALL]
if mod!="ALL": wanted_levels = [LEVEL_MODEL, LEVEL_OVERALL]
if loy!="ALL": wanted_levels = [LEVEL_LOYALTY, LEVEL_OVERALL]
elif chosen == 2:
if seg!="ALL" and mod!="ALL":
wanted_levels = [LEVEL_SEG_X_MODEL, LEVEL_SEGMENT, LEVEL_MODEL, LEVEL_OVERALL]
elif seg!="ALL" and loy!="ALL":
wanted_levels = [LEVEL_SEG_X_LOY, LEVEL_SEGMENT, LEVEL_LOYALTY, LEVEL_OVERALL]
elif mod!="ALL" and loy!="ALL":
wanted_levels = [LEVEL_MODEL_X_LOY, LEVEL_MODEL, LEVEL_LOYALTY, LEVEL_OVERALL]
else:
wanted_levels = [
LEVEL_MOD_X_SEG_X_LOY, LEVEL_SEG_X_LOY, LEVEL_SEG_X_MODEL, LEVEL_MODEL_X_LOY,
LEVEL_MODEL, LEVEL_SEGMENT, LEVEL_LOYALTY, LEVEL_OVERALL
]
# 1) ๋ ˆ๋ฒจ ์šฐ์„  ๋งค์นญ
for lvl in wanted_levels:
sub = df[df["level"] == lvl]
if seg!="ALL": sub = sub[sub["segment"] == seg]
if mod!="ALL": sub = sub[sub["model"] == mod]
if loy!="ALL": sub = sub[sub["loyalty"] == loy]
if not sub.empty:
sub = add_pref_score(sub).sort_values(["__score__", sort_col], ascending=[False, False])
row = sub.iloc[0]
return row.drop(labels=[c for c in ["__score__","__tmp_n__"] if c in row.index])
# 2) ์ •ํ™• ์กฐํ•ฉ ์‹คํŒจ ์‹œ, ๋ถ€๋ถ„์กฐํ•ฉ ๋งค์นญ
sub = df.copy()
if seg!="ALL": sub = sub[sub["segment"] == seg]
if mod!="ALL": sub = sub[sub["model"] == mod]
if loy!="ALL": sub = sub[sub["loyalty"] == loy]
if not sub.empty:
sub = add_pref_score(sub).sort_values(["__score__", sort_col], ascending=[False, False])
row = sub.iloc[0]
return row.drop(labels=[c for c in ["__score__","__tmp_n__"] if c in row.index])
# 3) ๋‹จ์ผ ์ปฌ๋Ÿผ๋งŒ ๋งž๋Š” ํ–‰์ด๋ผ๋„
for col, val in [("segment", seg), ("model", mod), ("loyalty", loy)]:
if val != "ALL":
sub = df[df[col]==val]
if not sub.empty:
sub = add_pref_score(sub).sort_values(["__score__", sort_col], ascending=[False, False])
row = sub.iloc[0]
return row.drop(labels=[c for c in ["__score__","__tmp_n__"] if c in row.index])
# 4) ์™„์ „ ์‹คํŒจ ์‹œ ํ‘œ๋ณธ์ˆ˜ ์ตœ๋Œ€
row = df.sort_values(sort_col, ascending=False).iloc[0]
return row.drop(labels=[c for c in ["__score__","__tmp_n__"] if c in row.index])
# ===================== ์ฐจํŠธ/ํ‘œ ์œ ํ‹ธ =====================
def _pick_sample_for_stage(r, stage_prefix: str) -> int:
for c in [f"{stage_prefix}_sample_size", "sample_size", "n", "N", "base", "๋ฒ ์ด์Šค์ˆ˜", "ํ‘œ๋ณธ์ˆ˜"]:
if c in r and pd.notna(r.get(c)):
return _safe_int0(r.get(c))
return _safe_int0(r.get("pref_sample_size"))
def metrics_table_row(r):
def sd_from_ci(lo, hi):
if pd.isna(lo) or pd.isna(hi): return np.nan
return (hi - lo)/(2*1.96)
rows = []
mapping = [
("์„ ํ˜ธ", "pref_success_rate", "pref_ci_lower", "pref_ci_upper", "pref_snr", "pref_lift_vs_galaxy"),
("์ถ”์ฒœ", "rec_success_rate", "rec_ci_lower", "rec_ci_upper", "rec_snr", "rec_lift_vs_galaxy"),
("๊ตฌ๋งค์˜ํ–ฅ", "intent_success_rate", "intent_ci_lower", "intent_ci_upper", "intent_snr", "intent_lift_vs_galaxy"),
("๊ตฌ๋งค", "buy_success_rate", "buy_ci_lower", "buy_ci_upper", "buy_snr", "buy_lift_vs_galaxy"),
]
for label, m, lo, hi, snr, lift in mapping:
mval = _safe_num(r.get(m))
loval = _safe_num(r.get(lo))
hival = _safe_num(r.get(hi))
snrval = _safe_num(r.get(snr))
liftval= _safe_num(r.get(lift))
stage_prefix = m.split("_")[0]
rows.append(dict(
๋‹จ๊ณ„=label,
๋ฒ ์ด์Šค์ˆ˜=_pick_sample_for_stage(r, stage_prefix),
์„ฑ๊ณตํ™•๋ฅ =mval, ํ•˜ํ•œ=loval, ์ƒํ•œ=hival,
์‹คํŒจํ™•๋ฅ =(None if pd.isna(mval) else 1-mval),
ํŒ์ •=("์„ฑ๊ณต" if (np.isfinite(mval) and mval>=0.5) else ("์‹คํŒจ" if np.isfinite(mval) else "N/A")),
ํ‰๊ฐ€๋“ฑ๊ธ‰=("N/A" if not np.isfinite(mval) else ("A" if mval>=0.70 else "B" if mval>=0.55 else "C" if mval>=0.45 else "D")),
SNR=snrval, Lift=liftval, rawํ‰๊ท =mval,
rawํ‘œ์ค€ํŽธ์ฐจ=sd_from_ci(loval, hival)
))
return pd.DataFrame(rows)
def drops_from_anywhere(row, df_tm, seg, mod, loy):
seg = _as_all(seg); mod = _as_all(mod); loy = _as_all(loy)
d1 = _safe_num(row.get("bayesian_dropout_pref_to_rec"))
d2 = _safe_num(row.get("bayesian_dropout_rec_to_intent"))
d3 = _safe_num(row.get("bayesian_dropout_intent_to_buy"))
full = _safe_num(row.get("bayesian_full_conversion"))
if df_tm is None or df_tm.empty:
return d1, d2, d3, full
need = [np.isfinite(d1), np.isfinite(d2), np.isfinite(d3), np.isfinite(full)]
if all(need): return d1, d2, d3, full
m = pd.Series(True, index=df_tm.index)
if "segment" in df_tm and seg!="ALL": m &= (df_tm["segment"].astype(str)==seg)
if "model" in df_tm and mod!="ALL": m &= (df_tm["model"].astype(str)==mod)
if "loyalty" in df_tm and loy!="ALL": m &= (df_tm["loyalty"].astype(str)==loy)
sub = df_tm[m].copy()
if sub.empty: sub = df_tm.copy()
w = pd.to_numeric(sub.get("pref_sample_size", pd.Series(1, index=sub.index)), errors="coerce").fillna(1)
def wmean(col):
v = pd.to_numeric(sub.get(col, pd.Series(np.nan, index=sub.index)), errors="coerce")
if v.notna().any(): return float(np.nansum(v*w)/np.nansum(w))
return np.nan
d1 = d1 if np.isfinite(d1) else wmean("bayesian_dropout_pref_to_rec")
d2 = d2 if np.isfinite(d2) else wmean("bayesian_dropout_rec_to_intent")
d3 = d3 if np.isfinite(d3) else wmean("bayesian_dropout_intent_to_buy")
full = full if np.isfinite(full) else wmean("bayesian_full_conversion")
return d1, d2, d3, full
def biggest_drop_text_by_sources(row, df_tm, seg, mod, loy):
d1, d2, d3, _ = drops_from_anywhere(row, df_tm, seg, mod, loy)
pairs = [("์„ ํ˜ธโ†’์ถ”์ฒœ", d1), ("์ถ”์ฒœโ†’๊ตฌ๋งค์˜ํ–ฅ", d2), ("๊ตฌ๋งค์˜ํ–ฅโ†’๊ตฌ๋งค", d3)]
pairs = [(n, v) for n, v in pairs if np.isfinite(v)]
if not pairs: return "๋ฐ์ดํ„ฐ ์—†์Œ"
name, val = max(pairs, key=lambda x: x[1])
base_n = _safe_int0(row.get("pref_sample_size"))
return f"{name}์—์„œ {val*100:.1f}%p ์†์‹ค (์ƒ˜ํ”Œ {base_n:,})"
def compose_composite_row(df_scope: pd.DataFrame) -> pd.Series:
if df_scope is None or df_scope.empty:
return pd.Series(dtype=float)
s = df_scope.copy()
w = pd.to_numeric(s.get("pref_sample_size", pd.Series(1, index=s.index)), errors="coerce").fillna(1.0)
w_sum = float(np.nansum(w)) if np.isfinite(np.nansum(w)) and np.nansum(w) > 0 else 1.0
w_norm = w / w_sum
def wmean(col):
v = pd.to_numeric(s.get(col, pd.Series(np.nan, index=s.index)), errors="coerce")
if v.notna().any(): return float(np.nansum(v * w_norm))
return np.nan
def combine_ci(lo_col, hi_col, mean_col):
m = pd.to_numeric(s.get(mean_col, pd.Series(np.nan, index=s.index)), errors="coerce")
lo = pd.to_numeric(s.get(lo_col, pd.Series(np.nan, index=s.index)), errors="coerce")
hi = pd.to_numeric(s.get(hi_col, pd.Series(np.nan, index=s.index)), errors="coerce")
if not (m.notna().any() and lo.notna().any() and hi.notna().any()):
return np.nan, np.nan
m_bar = float(np.nansum(m * w_norm))
sd = (hi - lo) / (2 * 1.96)
sd = pd.to_numeric(sd, errors="coerce")
var = np.nansum(w_norm * (sd**2 + (m - m_bar)**2))
sd_c = float(np.sqrt(var)) if np.isfinite(var) else np.nan
if not np.isfinite(sd_c): return np.nan, np.nan
return (m_bar - 1.96 * sd_c), (m_bar + 1.96 * sd_c)
pref_m = wmean("pref_success_rate")
rec_m = wmean("rec_success_rate")
intent_m = wmean("intent_success_rate")
buy_m = wmean("buy_success_rate")
pref_lo, pref_hi = combine_ci("pref_ci_lower", "pref_ci_upper", "pref_success_rate")
rec_lo, rec_hi = combine_ci("rec_ci_lower", "rec_ci_upper", "rec_success_rate")
intent_lo, intent_hi = combine_ci("intent_ci_lower", "intent_ci_upper", "intent_success_rate")
buy_lo, buy_hi = combine_ci("buy_ci_lower", "buy_ci_upper", "buy_success_rate")
d1 = wmean("bayesian_dropout_pref_to_rec")
d2 = wmean("bayesian_dropout_rec_to_intent")
d3 = wmean("bayesian_dropout_intent_to_buy")
full = wmean("bayesian_full_conversion")
pref_snr = wmean("pref_snr"); rec_snr = wmean("rec_snr")
intent_snr = wmean("intent_snr"); buy_snr = wmean("buy_snr")
pref_lift = wmean("pref_lift_vs_galaxy"); rec_lift = wmean("rec_lift_vs_galaxy")
intent_lift = wmean("intent_lift_vs_galaxy"); buy_lift = wmean("buy_lift_vs_galaxy")
out = {
"pref_sample_size": float(np.nansum(w)),
"pref_success_rate": pref_m, "pref_ci_lower": pref_lo, "pref_ci_upper": pref_hi,
"rec_success_rate": rec_m, "rec_ci_lower": rec_lo, "rec_ci_upper": rec_hi,
"intent_success_rate": intent_m, "intent_ci_lower": intent_lo, "intent_ci_upper": intent_hi,
"buy_success_rate": buy_m, "buy_ci_lower": buy_lo, "buy_ci_upper": buy_hi,
"bayesian_dropout_pref_to_rec": d1,
"bayesian_dropout_rec_to_intent": d2,
"bayesian_dropout_intent_to_buy": d3,
"bayesian_full_conversion": full,
"pref_snr": pref_snr, "rec_snr": rec_snr, "intent_snr": intent_snr, "buy_snr": buy_snr,
"pref_lift_vs_galaxy": pref_lift, "rec_lift_vs_galaxy": rec_lift,
"intent_lift_vs_galaxy": intent_lift, "buy_lift_vs_galaxy": buy_lift,
}
return pd.Series(out)
# ===================== ์ฐจํŠธ =====================
def _empty_fig(msg="Load data first", height=360, hide_axes=False):
fig = go.Figure()
fig.add_annotation(text=msg, x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
fig.update_layout(
height=height,
margin=dict(l=10, r=10, t=30, b=10),
paper_bgcolor="#ffffff",
plot_bgcolor="#ffffff",
uirevision="keep",
)
fig = apply_dense_grid(fig) # ๊ธฐ์กด ์Šคํƒ€์ผ ์œ ์ง€
if hide_axes: # Sankey ๋“ฑ ์นดํ…Œ์‹œ์•ˆ ์ถ•์ด ๋ถˆํ•„์š”ํ•œ ๊ฒฝ์šฐ
fig.update_xaxes(visible=False, showgrid=False, zeroline=False)
fig.update_yaxes(visible=False, showgrid=False, zeroline=False)
return fig
def hex_to_rgba(hex_color: str, a: float | None = None) -> str:
s = hex_color.strip().lstrip("#")
if len(s) in (3, 4):
s = "".join(ch * 2 for ch in s)
if len(s) == 6:
r = int(s[0:2], 16); g = int(s[2:4], 16); b = int(s[4:6], 16)
alpha = 1.0 if a is None else float(a)
elif len(s) == 8:
r = int(s[0:2], 16); g = int(s[2:4], 16); b = int(s[4:6], 16)
hex_alpha = int(s[6:8], 16) / 255.0
alpha = hex_alpha if a is None else float(a)
else:
raise ValueError("hex must be #RGB, #RRGGBB, or #RRGGBBAA")
alpha = max(0.0, min(1.0, alpha))
return f"rgba({r},{g},{b},{alpha:.3g})"
def _normalize_stage_label(v: str) -> str | None:
if v is None:
return None
s = str(v).strip().lower()
s = re.sub(r'[\s\-\_]+', ' ', s) # ๊ณต๋ฐฑ/-,_ ์ •๋ฆฌ
joined = s.replace(' ', '')
# ์ „์ฒด
if any(k in (s, joined) for k in [
"overall","total","all","์ „์ฒด","์ „์ฒด์‚ฌ์šฉ์ž","๋ชจ๋“ ์‚ฌ์šฉ์ž","allusers","all user","all-user"
]):
return "์ „์ฒด"
# ๋ฏธ์„ ํ˜ธ(๋น„์„ ํ˜ธ/ํƒˆ๋ฝ/๋“œ๋ž/No preference ๋“ฑ)
if any(k in (s, joined) for k in [
"๋ฏธ์„ ํ˜ธ","๋น„์„ ํ˜ธ","์„ ํ˜ธ์•„๋‹˜","์„ ํ˜ธ ์•„๋‹˜",
"nopref","no preference","dislike","ํƒˆ๋ฝ","drop","dropped"
]):
return "๋ฏธ์„ ํ˜ธ"
# ๊ตฌ๋งค์˜ํ–ฅ(์˜ํ–ฅ/์˜๋„/์˜์‚ฌ/intent ๊ณ„์—ด)
if ("์˜ํ–ฅ" in s) or ("์˜๋„" in s) or ("์˜์‚ฌ" in s) \
or ("intent" in s) or ("intention" in s) \
or ("purchaseintent" in joined) or ("purchase-intent" in s):
return "๊ตฌ๋งค์˜ํ–ฅ"
# ๊ตฌ๋งค(์‹ค์ œ๊ตฌ๋งค/๊ตฌ๋งค์™„๋ฃŒ/๊ตฌ๋งคํ™•์ •/๊ตฌ์ž…/๊ฒฐ์ œ/๋งค์ถœ/buy/purchase)
if ("๊ตฌ๋งค" in s) or ("๊ตฌ์ž…" in s) or ("๊ฒฐ์ œ" in s) or ("๊ฒฐ์žฌ" in s) or ("๋งค์ถœ" in s) \
or (s == "buy") or ("purchase" in s):
return "๊ตฌ๋งค"
# ์„ ํ˜ธ
if ("์„ ํ˜ธ" in s) or ("ํ˜ธ๊ฐ" in s) or ("preference" in s) or (s == "pref"):
return "์„ ํ˜ธ"
# ์ถ”์ฒœ
if (s == "rec") or ("recommend" in s) or ("์ถ”์ฒœ" in s):
return "์ถ”์ฒœ"
return None
# ==== STAGES & ORDER (๊ธฐ์กด ๊ฒƒ์„ ๊ต์ฒด) ====
STAGES = ["์ „์ฒด", "๋ฏธ์„ ํ˜ธ", "์„ ํ˜ธ", "์ถ”์ฒœ", "๊ตฌ๋งค์˜ํ–ฅ", "๊ตฌ๋งค"]
ORDER = {v:i for i,v in enumerate(STAGES)}
# ์ƒ‰์ƒ ํ•˜๋‚˜ ์ถ”๊ฐ€(์€์€ํ•œ ํšŒ์ƒ‰ ๊ณ„์—ด ๊ถŒ์žฅ)
COL_STAGE_DROP = "#CBD5E1" # ๋ฏธ์„ ํ˜ธ
def _group_forward_flows(df_sankey, seg, mod, loy):
if df_sankey is None or df_sankey.empty:
return pd.DataFrame(columns=["from_stage","to_stage","count","flow_phi"])
seg = _as_all(seg); mod = _as_all(mod); loy = _as_all(loy)
s = df_sankey.copy()
m = pd.Series(True, index=s.index)
if "segment" in s and seg!="ALL": m &= (s["segment"].astype(str)==seg)
if "model" in s and mod!="ALL": m &= (s["model"].astype(str)==mod)
if "loyalty" in s and loy!="ALL": m &= (s["loyalty"].astype(str)==loy)
s = s[m].copy()
if s.empty:
return pd.DataFrame(columns=["from_stage","to_stage","count","flow_phi"])
alias = {
"all":"์ „์ฒด","ALL":"์ „์ฒด","์ „์ฒด":"์ „์ฒด",
"pref":"์„ ํ˜ธ","preference":"์„ ํ˜ธ","์„ ํ˜ธ๋„":"์„ ํ˜ธ",
"rec":"์ถ”์ฒœ","recommend":"์ถ”์ฒœ","์ถ”์ฒœ๋„":"์ถ”์ฒœ",
"intent":"๊ตฌ๋งค์˜ํ–ฅ","intention":"๊ตฌ๋งค์˜ํ–ฅ","๊ตฌ๋งค์˜๋„":"๊ตฌ๋งค์˜ํ–ฅ",
"purchase":"๊ตฌ๋งค","buy":"๊ตฌ๋งค","์‹ค์ œ๊ตฌ๋งค":"๊ตฌ๋งค"
}
s["from_stage"] = s.get("from_stage", s.get("from", s.get("source"))).astype(str).str.strip().replace(alias)
s["to_stage"] = s.get("to_stage", s.get("to", s.get("target"))).astype(str).str.strip().replace(alias)
# ๐Ÿ”‘ count ๋ณ„์นญ ํ—ˆ์šฉ
cnt_cands = ["bayesian_flow_count","count","value","weight","n","freq"]
cnt_col = next((c for c in cnt_cands if c in s.columns), None)
if cnt_col is None:
return pd.DataFrame(columns=["from_stage","to_stage","count","flow_phi"])
s[cnt_col] = pd.to_numeric(s[cnt_col], errors="coerce")
s = s[np.isfinite(s[cnt_col]) & (s[cnt_col]>0)]
s = s[s["from_stage"].isin(STAGES) & s["to_stage"].isin(STAGES)]
s = s[s.apply(lambda r: ORDER[r["from_stage"]] < ORDER[r["to_stage"]], axis=1)]
if s.empty:
return pd.DataFrame(columns=["from_stage","to_stage","count","flow_phi"])
g = (s.groupby(["from_stage","to_stage"], as_index=False)[cnt_col]
.sum().rename(columns={cnt_col:"count"}))
# [์œ ์ž… ์—†๋Š” ๋‹จ๊ณ„ ๋ณด๊ฐ•] ์ „์ฒดโ†’๋‹จ๊ณ„ ๋งํฌ ์ž๋™ ์ถ”๊ฐ€
pairs = set(zip(g["from_stage"], g["to_stage"]))
def _has_incoming(stage):
k = ORDER[stage]
return any((prev, stage) in pairs for prev in STAGES[:k])
add_rows = []
for st in STAGES[1:]:
if not _has_incoming(st):
out_sum = float(g.loc[g["from_stage"] == st, "count"].sum())
if out_sum > 0:
add_rows.append({"from_stage": "์ „์ฒด", "to_stage": st, "count": out_sum})
if add_rows:
g = pd.concat([g, pd.DataFrame(add_rows)], ignore_index=True)
# ฯ† ์Šค์ผ€์ผ ์ ์šฉ
k = _flow_scale(seg, mod, loy)
g["flow_phi"] = g["count"].astype(float) * k
return g
# ===== Sankey ๋‚ด๋ถ€์šฉ ํ…Œ์ด๋ธ” ๋นŒ๋”(๊ฐ„์ ‘ ํฌํ•จ, ๊ตฌ๋งค๋กœ ์ ‘๊ธฐ ์˜ต์…˜) =====
# ๋…ธ๋“œ(๋ฒ ์ด์ง€) & ๋งํฌ(ํšŒ์ƒ‰) ํŒ”๋ ˆํŠธ
COL_STAGE_OVERALL = "#B68E5C" # ์ „์ฒด
COL_STAGE_PREF = "#C6955E" # ์„ ํ˜ธ
COL_STAGE_REC = "#D5A86D" # ์ถ”์ฒœ
COL_STAGE_INTENT = "#BE8F4E" # ์˜ํ–ฅ
COL_STAGE_BUY = "#A97F45" # ๊ตฌ๋งค
COL_LINK_DIRECT = "#4B5563" # ์ง์ ‘(์ง™์€ ํšŒ์ƒ‰)
COL_LINK_INDIRECT = "#D1D5DB" # ๊ฐ„์ ‘(์—ฐํ•œ ํšŒ์ƒ‰)
def _sankey_build_table(df_sankey, seg="ALL", mod="ALL", loy="ALL",
collapse_to_buy=True, collapse_from=("์„ ํ˜ธ","์ถ”์ฒœ","๊ตฌ๋งค์˜ํ–ฅ")) -> pd.DataFrame:
if df_sankey is None or df_sankey.empty:
return pd.DataFrame(columns=["from_stage","to_stage","count","dist","kind","to_buy","flow_phi"])
s = df_sankey.copy()
# --- [NEW] ํ˜ธํ™˜ ๊ฐ€๋“œ: ์—ด ๋ณ„์นญ์„ ํ‘œ์ค€ ์ด๋ฆ„์œผ๋กœ ํ†ต์ผ ---
# 1) from/to ๋ณ„์นญ โ†’ from_stage/to_stage
from_col = next((c for c in ["from_stage","from","source","src"] if c in s.columns), None)
to_col = next((c for c in ["to_stage","to","target","dst"] if c in s.columns), None)
if from_col and from_col != "from_stage":
s = s.rename(columns={from_col: "from_stage"})
if to_col and to_col != "to_stage":
s = s.rename(columns={to_col: "to_stage"})
# ํ•„์ˆ˜ ์—ด ์—†์œผ๋ฉด ๋นˆ ํ…Œ์ด๋ธ” ๋ฐ˜ํ™˜ (์•ˆ์ „ ๊ฐ€๋“œ)
if "from_stage" not in s.columns or "to_stage" not in s.columns:
return pd.DataFrame(columns=["from_stage","to_stage","count","dist","kind","to_buy","flow_phi"])
# 2) ์ˆ˜์น˜ ์—ด ๋ณ„์นญ โ†’ bayesian_flow_count
alt_cnt = next((c for c in ["bayesian_flow_count","count","value","flow","weight","n","freq"]
if c in s.columns), None)
if alt_cnt and alt_cnt != "bayesian_flow_count":
s = s.rename(columns={alt_cnt: "bayesian_flow_count"})
# ํ•„ํ„ฐ
for col, val in (("segment", seg), ("model", mod), ("loyalty", loy)):
if col in s.columns and str(val) != "ALL":
s = s[s[col].astype(str) == str(val)]
if s.empty:
return pd.DataFrame(columns=["from_stage","to_stage","count","dist","kind","to_buy","flow_phi"])
# ๋ผ๋ฒจ ์ •๊ทœํ™” โ†’ ์ˆœ๋ฐฉํ–ฅ๋งŒ
s["from_stage"] = s.get("from_stage", s.get("from", s.get("source"))).map(_normalize_stage_label)
s["to_stage"] = s.get("to_stage", s.get("to", s.get("target"))).map(_normalize_stage_label)
s = s.dropna(subset=["from_stage","to_stage"])
s = s[s["from_stage"].isin(STAGES) & s["to_stage"].isin(STAGES)]
s = s[s.apply(lambda r: ORDER[r["from_stage"]] < ORDER[r["to_stage"]], axis=1)]
# ๐Ÿ”‘ count ์ปฌ๋Ÿผ ๋ณ„์นญ ํ—ˆ์šฉ (์›์ฒœ ์‹œํŠธ/์บ์‹œ ์‹œํŠธ ๋ชจ๋‘ ์ปค๋ฒ„)
cnt_cands = ["bayesian_flow_count", "count", "value", "weight", "n", "freq"]
cnt_col = next((c for c in cnt_cands if c in s.columns), None)
if cnt_col is None:
return pd.DataFrame(columns=["from_stage","to_stage","count","dist","kind","to_buy","flow_phi"])
s[cnt_col] = pd.to_numeric(s[cnt_col], errors="coerce")
s = s[np.isfinite(s[cnt_col]) & (s[cnt_col] > 0)]
if s.empty:
return pd.DataFrame(columns=["from_stage","to_stage","count","dist","kind","to_buy","flow_phi"])
# ๊ธฐ๋ณธ ์ง‘๊ณ„
g = (s.groupby(["from_stage","to_stage"], as_index=False)[cnt_col]
.sum().rename(columns={cnt_col:"count"}))
# ์œ ์ž… ์—†๋Š” ๋‹จ๊ณ„ ๋ณด๊ฐ•(์ „์ฒดโ†’๋‹จ๊ณ„)
pairs = set(zip(g["from_stage"], g["to_stage"]))
def _has_incoming(stage):
k = ORDER[stage]
return any((prev, stage) in pairs for prev in STAGES[:k])
add_rows = []
for st in STAGES[1:]:
if not _has_incoming(st):
out_sum = float(g.loc[g["from_stage"]==st, "count"].sum())
if out_sum > 0:
add_rows.append({"from_stage":"์ „์ฒด","to_stage":st,"count":out_sum})
if add_rows:
g = pd.concat([g, pd.DataFrame(add_rows)], ignore_index=True)
# (์˜ต์…˜) ๊ตฌ๋งค๋กœ ์ ‘์€ ๊ฐ„์ ‘ ๋งํฌ ์ถ”๊ฐ€: ์„ ํ˜ธ/์ถ”์ฒœ/๊ตฌ๋งค์˜ํ–ฅ โ†’ ๊ตฌ๋งค
if collapse_to_buy:
buy_in = float(pd.to_numeric(g.loc[g["to_stage"]=="๊ตฌ๋งค","count"], errors="coerce").fillna(0).sum())
if buy_in > 0:
exist = set(zip(g["from_stage"], g["to_stage"]))
extra = []
for st in collapse_from:
if st in ORDER and (st, "๊ตฌ๋งค") not in exist and ORDER[st] < ORDER["๊ตฌ๋งค"]:
extra.append({"from_stage": st, "to_stage": "๊ตฌ๋งค", "count": buy_in})
if extra:
g = pd.concat([g, pd.DataFrame(extra)], ignore_index=True)
# ๋ฉ”ํƒ€ ์นผ๋Ÿผ
kphi = _flow_scale(seg, mod, loy) # ๋น„๊ณต๊ฐœ ์Šค์ผ€์ผ
g["flow_phi"] = g["count"].astype(float) * kphi
g["dist"] = g["to_stage"].map(ORDER) - g["from_stage"].map(ORDER)
g["kind"] = np.where(g["dist"]==1, "์ง์ ‘", "๊ฐ„์ ‘")
g["to_buy"] = (g["to_stage"] == "๊ตฌ๋งค")
cols = ["from_stage","to_stage","count","dist","kind","to_buy","flow_phi"]
return g[cols].sort_values(["dist","from_stage","to_stage"]).reset_index(drop=True)
# ====== Sankey ์ƒ‰/์Šคํ…Œ์ด์ง€ ======
STAGES = ["์ „์ฒด","๋ฏธ์„ ํ˜ธ","์„ ํ˜ธ","์ถ”์ฒœ","๊ตฌ๋งค์˜ํ–ฅ","๊ตฌ๋งค"]
ORDER = {v:i for i,v in enumerate(STAGES)}
COL_STAGE_OVERALL = "#B68E5C"
COL_STAGE_NONPREF = "#9CA3AF" # โ† ๋ฏธ์„ ํ˜ธ(ํšŒ์ƒ‰)
COL_STAGE_PREF = "#C6955E"
COL_STAGE_REC = "#D5A86D"
COL_STAGE_INTENT = "#BE8F4E"
COL_STAGE_BUY = "#A97F45"
COL_LINK_DIRECT = "#4B5563" # ์ง™์€ ํšŒ์ƒ‰ (์ง์ ‘)
COL_LINK_INDIRECT = "#D1D5DB" # ์—ฐํ•œ ํšŒ์ƒ‰ (๊ฐ„์ ‘)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# (์œ ํ‹ธ) ๊ฐ„์ ‘ "โ†’๊ตฌ๋งค" ์ ‘๊ธฐ ๋ณด๊ฐ•
def add_collapsed_to_buy(tbl: pd.DataFrame, add_from=("์„ ํ˜ธ","์ถ”์ฒœ","๊ตฌ๋งค์˜ํ–ฅ")) -> pd.DataFrame:
if tbl is None or tbl.empty:
return tbl
# โ”€โ”€ ๊ธฐ์ค€ ๋‹จ๊ณ„/์ˆœ์„œ(๋ฏธ์„ ํ˜ธ ํฌํ•จ 6๋‹จ๊ณ„)
stages = ["์ „์ฒด","๋ฏธ์„ ํ˜ธ","์„ ํ˜ธ","์ถ”์ฒœ","๊ตฌ๋งค์˜ํ–ฅ","๊ตฌ๋งค"]
order = {v:i for i,v in enumerate(stages)}
t = tbl.copy()
# โ”€โ”€ ๊ตฌ๋งค ์œ ์ž… ์ด๋Ÿ‰
buy_in = float(pd.to_numeric(t.loc[t["to_stage"]=="๊ตฌ๋งค","count"], errors="coerce").fillna(0).sum())
# โ”€โ”€ ฯ† ์Šค์ผ€์ผ(k) ์ถ”์ •
kphi = 1.0
if "flow_phi" in t.columns and "count" in t.columns:
r = pd.to_numeric(t["flow_phi"], errors="coerce") / pd.to_numeric(t["count"], errors="coerce")
r = r.replace([np.inf,-np.inf], np.nan).dropna()
if not r.empty:
kphi = float(np.median(r))
# โ”€โ”€ ๊ทธ๋ฃน ๋ฉ”ํƒ€(snapshot): ๋‹จ์ผ๊ฐ’์ด๋ฉด ๊ทธ ๊ฐ’, ์•„๋‹ˆ๋ฉด "ALL"
meta_cols = [c for c in ["segment","model","loyalty","level"] if c in t.columns]
meta = {c: (t[c].dropna().iloc[0] if t[c].nunique(dropna=True)==1 else "ALL") for c in meta_cols}
extra = []
for s in add_from:
if s not in order or order[s] >= order["๊ตฌ๋งค"]:
continue
# ์ด๋ฏธ ์กด์žฌํ•˜๋ฉด ์ค‘๋ณต ์ถ”๊ฐ€ ๊ธˆ์ง€
if ((t["from_stage"]==s) & (t["to_stage"]=="๊ตฌ๋งค")).any():
continue
row = {
"from_stage": s,
"to_stage": "๊ตฌ๋งค",
"count": buy_in,
"dist": order["๊ตฌ๋งค"] - order[s],
"kind": ("๊ฐ„์ ‘" if (order["๊ตฌ๋งค"] - order[s]) > 1 else "์ง์ ‘"),
"to_buy": True,
"flow_phi": buy_in * kphi
}
# โ˜… ๋ฉ”ํƒ€ ๋™๋ด‰
for c, v in meta.items():
row[c] = v
extra.append(row)
if extra:
t = pd.concat([t, pd.DataFrame(extra)], ignore_index=True)
return t.sort_values(["dist","from_stage","to_stage"]).reset_index(drop=True)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# โฌ‡โฌ‡ ํ•ต์‹ฌ ์ˆ˜์ •: ๋ผ๋ฒจ์„ ๋จผ์ € ๋А์Šจํ•œ ๋ณ„์นญ์œผ๋กœ ์น˜ํ™˜ ํ›„, ์ •๊ทœํ™” ํ•จ์ˆ˜์— ํƒœ์›€
def _normalize_stage_soft(series: pd.Series) -> pd.Series:
if series.empty:
return series
s = series.astype(str).str.strip()
# 1) ๊ฐ•์ œ ๋ณ„์นญ(์ •ํ™•์น˜ํ™˜) โ€” ์˜ํ–ฅ/์˜๋„/์˜์‚ฌ/intent, ๊ตฌ๋งค์™„๋ฃŒ/์‹ค์ œ๊ตฌ๋งค, ์ „์ฒด์‚ฌ์šฉ์ž ๋“ฑ
alias_exact = {
# ์ „์ฒด
"์ „์ฒด์‚ฌ์šฉ์ž": "์ „์ฒด", "๋ชจ๋“  ์‚ฌ์šฉ์ž": "์ „์ฒด", "all": "์ „์ฒด", "ALL": "์ „์ฒด",
# ์„ ํ˜ธ
"์„ ํ˜ธ๋„": "์„ ํ˜ธ", "์„ ํ˜ธ๋„๋†’์Œ": "์„ ํ˜ธ", "ํ˜ธ๊ฐ๋„": "์„ ํ˜ธ", "ํ˜ธ๊ฐ๋„๋†’์Œ": "์„ ํ˜ธ",
# ์ถ”์ฒœ
"์ถ”์ฒœ๋„": "์ถ”์ฒœ", "์ถ”์ฒœ๋„๋†’์Œ": "์ถ”์ฒœ",
# ์˜ํ–ฅ/์˜๋„/์˜์‚ฌ/intent (๋‹ค์–‘ํ˜•)
"๊ตฌ๋งค์˜ํ–ฅ": "๊ตฌ๋งค์˜ํ–ฅ", "๊ตฌ๋งค ์˜ํ–ฅ": "๊ตฌ๋งค์˜ํ–ฅ", "๊ตฌ๋งค์˜ํ–ฅ๋†’์Œ": "๊ตฌ๋งค์˜ํ–ฅ", "๊ตฌ๋งค์˜ํ–ฅ ๋†’์Œ": "๊ตฌ๋งค์˜ํ–ฅ",
"๊ตฌ๋งค์˜๋„": "๊ตฌ๋งค์˜ํ–ฅ", "๊ตฌ๋งค ์˜๋„": "๊ตฌ๋งค์˜ํ–ฅ", "๊ตฌ๋งค์˜๋„๋†’์Œ": "๊ตฌ๋งค์˜ํ–ฅ", "๊ตฌ๋งค์˜๋„ ๋†’์Œ": "๊ตฌ๋งค์˜ํ–ฅ",
"๊ตฌ๋งค์˜์‚ฌ": "๊ตฌ๋งค์˜ํ–ฅ", "์˜์‚ฌ ์žˆ์Œ": "๊ตฌ๋งค์˜ํ–ฅ",
"intent": "๊ตฌ๋งค์˜ํ–ฅ", "Intent": "๊ตฌ๋งค์˜ํ–ฅ", "Intention": "๊ตฌ๋งค์˜ํ–ฅ",
"Purchase Intent": "๊ตฌ๋งค์˜ํ–ฅ", "PURCHASE_INTENT": "๊ตฌ๋งค์˜ํ–ฅ",
# ๊ตฌ๋งค
"์‹ค์ œ๊ตฌ๋งค": "๊ตฌ๋งค", "๊ตฌ๋งค ํ™•์ •": "๊ตฌ๋งค", "๊ตฌ๋งคํ™•์ •": "๊ตฌ๋งค", "๊ตฌ๋งค ์™„๋ฃŒ": "๊ตฌ๋งค", "๊ตฌ๋งค์™„๋ฃŒ": "๊ตฌ๋งค",
"๊ฒฐ์ œ": "๊ตฌ๋งค", "๊ฒฐ์žฌ": "๊ตฌ๋งค", "๋งค์ถœ": "๊ตฌ๋งค",
#๋ฏธ์„ ํ˜ธ
"๋ฏธ์„ ํ˜ธ": "๋ฏธ์„ ํ˜ธ", "๋น„์„ ํ˜ธ": "๋ฏธ์„ ํ˜ธ", "์„ ํ˜ธ ์•„๋‹˜": "๋ฏธ์„ ํ˜ธ", "ํƒˆ๋ฝ": "๋ฏธ์„ ํ˜ธ",
}
s = s.replace(alias_exact)
# 2) ํ† ํฐ/๋ถ€๋ถ„์ผ์น˜ ๊ธฐ๋ฐ˜ ์ •๊ทœํ™”(์ „์—ญ ํ•จ์ˆ˜๊ฐ€ ์žˆ์œผ๋ฉด ์žฌ์‚ฌ์šฉ)
def _norm_one(x: str) -> str | None:
try:
return _normalize_stage_label(x) # ์ „์—ญ ์ •์˜ ์กด์žฌ ์‹œ ํ™œ์šฉ
except Exception:
pass
# ํด๋ฐฑ: ๋ถ€๋ถ„์ผ์น˜
xl = x.lower().replace(" ", "")
if any(k in xl for k in ["all","์ „์ฒด"]): return "์ „์ฒด"
if any(k in xl for k in ["์„ ํ˜ธ","ํ˜ธ๊ฐ"]): return "์„ ํ˜ธ"
if "์ถ”์ฒœ" in xl or "rec" in xl: return "์ถ”์ฒœ"
if any(k in xl for k in ["์˜ํ–ฅ","์˜๋„","์˜์‚ฌ","intent"]): return "๊ตฌ๋งค์˜ํ–ฅ"
if any(k in xl for k in ["๊ตฌ๋งค","๊ตฌ์ž…","๊ฒฐ์ œ","๊ฒฐ์žฌ","์™„๋ฃŒ","ํ™•์ •","๋งค์ถœ","purch","buy"]): return "๊ตฌ๋งค"
if any(k in xl for k in ["๋ฏธ์„ ํ˜ธ","๋น„์„ ํ˜ธ","์„ ํ˜ธ์•„๋‹˜","nopref","npreference","ํƒˆ๋ฝ","drop"]): return "๋ฏธ์„ ํ˜ธ"
return None
return s.map(_norm_one)
# ํŒŒ์ผ ์ƒ๋‹จ ์–ด๋”˜๊ฐ€(์ƒ์ˆ˜๋“ค ๊ทผ์ฒ˜)์— ์ถ”๊ฐ€
LVL_PRIORITY = [
"๋ชจ๋ธร—์„ธ๊ทธร—์ถฉ์„ฑ๋„","์„ธ๊ทธร—๋ชจ๋ธ","๋ชจ๋ธร—์ถฉ์„ฑ๋„","์„ธ๊ทธร—์ถฉ์„ฑ๋„",
"๋ชจ๋ธ","์„ธ๊ทธ๋จผํŠธ","์ถฉ์„ฑ๋„","์ „์ฒด"
]
def _sanitize_sankey_table(
tbl: pd.DataFrame,
seg="ALL", mod="ALL", loy="ALL",
enforce_single_level: bool = True,
drop_overall_if_mixed: bool = True
) -> pd.DataFrame:
cols = ["from_stage","to_stage","count","dist","kind","to_buy","flow_phi"]
if tbl is None or tbl.empty:
return pd.DataFrame(columns=cols)
t = tbl.copy()
# (1) ์„ ํƒ๊ฐ’ ํ•„ํ„ฐ (์žˆ์„ ๋•Œ๋งŒ)
for col, val in (("segment", seg), ("model", mod), ("loyalty", loy)):
if col in t.columns and str(val) != "ALL":
t = t[t[col].astype(str).str.strip() == str(val)]
if t.empty:
return pd.DataFrame(columns=cols)
# (2) ๋ ˆ๋ฒจ ๋‹จ์ผํ™” (ํ˜ผ์ž… ๋ฐฉ์ง€) + ๊ณผ์ž‰ ๋“œ๋ž ์™„ํ™”
original = t
if enforce_single_level and "level" in t.columns:
picked = None
for lv in LVL_PRIORITY:
cand = t[t["level"].astype(str) == lv]
if not cand.empty:
picked = cand; break
if picked is not None:
t = picked
# ํ˜ผํ•ฉ์ด๋ฉด '์ „์ฒด'๋งŒ ์ œ๊ฑฐ (๋‹จ, ์ „๋ถ€ ๋น„๋ฉด ๋˜๋Œ๋ฆผ)
if ("level" in t.columns) and (t["level"].astype(str).nunique() > 1) and drop_overall_if_mixed:
t2 = t[t["level"].astype(str) != "์ „์ฒด"]
if not t2.empty:
t = t2
if t.empty:
# ๊ณผ์ž‰ ํ•„ํ„ฐ/๋“œ๋ž์œผ๋กœ ๋น„์—ˆ์œผ๋ฉด ์›๋ณธ์œผ๋กœ ๋˜๋Œ๋ ค ๊ณ„์†
t = original.copy()
# (3) ์ปฌ๋Ÿผ ๋ณ„์นญ
alias = {
"from_stage": ["from_stage","from","source","src"],
"to_stage": ["to_stage","to","target","dst"],
"count": ["count","bayesian_flow_count","flow","value","weight","n","freq"],
}
def pick(name):
keys = {str(c).strip().lower(): c for c in t.columns}
for a in alias[name]:
if a in keys: return keys[a]
return None
c_from = pick("from_stage"); c_to = pick("to_stage"); c_cnt = pick("count")
if not all([c_from, c_to, c_cnt]):
return pd.DataFrame(columns=cols)
t = t.rename(columns={c_from:"from_stage", c_to:"to_stage", c_cnt:"count"})
# (4) ๋ผ๋ฒจ ์ •๊ทœํ™” + ์ˆœ๋ฐฉํ–ฅ๋งŒ
t["from_stage"] = _normalize_stage_soft(t["from_stage"])
t["to_stage"] = _normalize_stage_soft(t["to_stage"])
t = t.dropna(subset=["from_stage","to_stage"])
t = t[t["from_stage"].isin(STAGES) & t["to_stage"].isin(STAGES)]
t = t[t.apply(lambda r: ORDER[r["from_stage"]] < ORDER[r["to_stage"]], axis=1)]
# (5) ์ˆ˜์น˜ ๋ณ€ํ™˜
t["count"] = pd.to_numeric(t["count"], errors="coerce")
t = t[np.isfinite(t["count"]) & (t["count"] > 0)]
# (5-๋ณด๊ฐ•) ๊ณผ๋„ ํ•„ํ„ฐ๋กœ ๋น„๋ฉด ์™„ํ™” ๋ชจ๋“œ: ๋‹จ๊ณ„ ์กฐ๊ฑด๋งŒ ์ ์šฉํ•˜๊ณ  ์ˆ˜์น˜๋งŒ ๋ณด์ •
if t.empty:
t = original.rename(columns={c_from:"from_stage", c_to:"to_stage", c_cnt:"count"}).copy()
t["from_stage"] = _normalize_stage_soft(t["from_stage"])
t["to_stage"] = _normalize_stage_soft(t["to_stage"])
t = t.dropna(subset=["from_stage","to_stage"])
t = t[t["from_stage"].isin(STAGES) & t["to_stage"].isin(STAGES)]
t["count"] = pd.to_numeric(t["count"], errors="coerce").fillna(0)
t = t[t["count"] > 0]
if t.empty:
return pd.DataFrame(columns=cols)
# (6) ๋ฉ”ํƒ€ ๋ณด๊ฐ•
t["dist"] = (t["to_stage"].map(ORDER) - t["from_stage"].map(ORDER)).astype(int)
if "kind" not in t.columns:
t["kind"] = np.where(t["dist"]==1, "์ง์ ‘", "๊ฐ„์ ‘")
else:
miss = ~t["kind"].astype(str).isin(["์ง์ ‘","๊ฐ„์ ‘"])
t.loc[miss,"kind"] = np.where(t.loc[miss,"dist"]==1, "์ง์ ‘","๊ฐ„์ ‘")
t["to_buy"] = (t["to_stage"]=="๊ตฌ๋งค")
# (7) ฯ†
kphi = _flow_scale(seg, mod, loy)
if "flow_phi" not in t.columns:
t["flow_phi"] = t["count"].astype(float) * kphi
else:
t["flow_phi"] = pd.to_numeric(t["flow_phi"], errors="coerce")
miss = ~np.isfinite(t["flow_phi"])
t.loc[miss, "flow_phi"] = t.loc[miss, "count"].astype(float) * kphi
return t[cols].sort_values(["dist","from_stage","to_stage"]).reset_index(drop=True)
def _sankey_from_master_row(row: pd.Series, seg, mod, loy) -> pd.DataFrame:
n = _safe_int0(row.get("pref_sample_size"))
if n <= 0:
return pd.DataFrame(columns=[
"from_stage","to_stage","count","dist","kind","to_buy","flow_phi",
"segment","model","loyalty"
])
def P(x):
v = _safe_num(x)
if not np.isfinite(v): return np.nan
return v/100.0 if v > 1.5 else v
# (A) ํ™•๋ฅ  ์•ˆ์ „ํ™”: NaN์ด๋ฉด 0, 0~1๋กœ ํด๋ฆฝ
def P01(x):
v = P(x)
return np.nan if not np.isfinite(v) else float(min(1.0, max(0.0, v)))
p_pref = P(row.get("pref_success_rate"))
p_rec = P(row.get("rec_success_rate"))
p_intent = P(row.get("intent_success_rate"))
p_buy = P(row.get("buy_success_rate"))
d1 = P(row.get("bayesian_dropout_pref_to_rec"))
d2 = P(row.get("bayesian_dropout_rec_to_intent"))
d3 = P(row.get("bayesian_dropout_intent_to_buy"))
pref = n * (p_pref if np.isfinite(p_pref) else 0.0)
rec = pref * (1 - d1) if np.isfinite(pref) and np.isfinite(d1) else n * (p_rec if np.isfinite(p_rec) else 0.0)
intent = rec * (1 - d2) if np.isfinite(rec) and np.isfinite(d2) else n * (p_intent if np.isfinite(p_intent) else 0.0)
buy = intent*(1 - d3) if np.isfinite(intent) and np.isfinite(d3) else n * (p_buy if np.isfinite(p_buy) else 0.0)
drop0 = max(0.0, float(n) - float(pref))
rows = [
{"from_stage":"์ „์ฒด","to_stage":"๋ฏธ์„ ํ˜ธ", "count": drop0},
{"from_stage":"์ „์ฒด","to_stage":"์„ ํ˜ธ", "count": pref},
{"from_stage":"์„ ํ˜ธ","to_stage":"์ถ”์ฒœ", "count":max(0.0, rec)},
{"from_stage":"์ถ”์ฒœ","to_stage":"๊ตฌ๋งค์˜ํ–ฅ", "count":max(0.0, intent)},
{"from_stage":"๊ตฌ๋งค์˜ํ–ฅ","to_stage":"๊ตฌ๋งค", "count":max(0.0, buy)},
]
g = pd.DataFrame(rows).dropna()
g["count"] = pd.to_numeric(g["count"], errors="coerce").fillna(0)
g = g[g["count"] > 0]
g["dist"] = g["to_stage"].map(ORDER) - g["from_stage"].map(ORDER)
g["kind"] = np.where(g["dist"]==1, "์ง์ ‘", "๊ฐ„์ ‘")
g["to_buy"] = (g["to_stage"]=="๊ตฌ๋งค")
kphi = _flow_scale(seg, mod, loy)
g["flow_phi"] = g["count"].astype(float) * kphi
g["segment"] = seg; g["model"] = mod; g["loyalty"] = loy
return g[[
"from_stage","to_stage","count","dist","kind","to_buy","flow_phi",
"segment","model","loyalty"
]]
LEVELS_FOR_SANKEY = [
("์ „์ฒด", []),
("์„ธ๊ทธ๋จผํŠธ", ["segment"]),
("๋ชจ๋ธ", ["model"]),
("์ถฉ์„ฑ๋„", ["loyalty"]),
("์„ธ๊ทธร—๋ชจ๋ธ", ["segment","model"]),
("์„ธ๊ทธร—์ถฉ์„ฑ๋„", ["segment","loyalty"]),
("๋ชจ๋ธร—์ถฉ์„ฑ๋„", ["model","loyalty"]),
("๋ชจ๋ธร—์„ธ๊ทธร—์ถฉ์„ฑ๋„", ["segment","model","loyalty"]),
]
def build_sankey_cache_from_master(df_master: pd.DataFrame,
collapse_to_buy=True,
collapse_from=("์„ ํ˜ธ","์ถ”์ฒœ","๊ตฌ๋งค์˜ํ–ฅ")) -> pd.DataFrame:
dfm = _ensure_key_cols(df_master).copy()
out = []
for _lvl, keys in LEVELS_FOR_SANKEY:
if not keys:
seg, mod, loy = "ALL","ALL","ALL"
row = compose_composite_row(dfm)
if not row.empty:
part = _sankey_from_master_row(row, seg, mod, loy)
part["level"] = _lvl
out.append(part)
continue
for vals, grp in dfm.groupby(keys, dropna=False):
if not isinstance(vals, tuple): vals = (vals,)
seg = vals[keys.index("segment")] if "segment" in keys else "ALL"
mod = vals[keys.index("model")] if "model" in keys else "ALL"
loy = vals[keys.index("loyalty")] if "loyalty" in keys else "ALL"
row = compose_composite_row(grp)
if row.empty:
continue
part = _sankey_from_master_row(row, seg, mod, loy)
part["level"] = _lvl
out.append(part)
if not out:
return pd.DataFrame(columns=[
"from_stage","to_stage","count","dist","kind","to_buy","flow_phi",
"segment","model","loyalty","level"
])
full = pd.concat(out, ignore_index=True)
if collapse_to_buy and not full.empty:
full = (full.groupby(["level","segment","model","loyalty"], group_keys=False)
.apply(lambda g: add_collapsed_to_buy(g, add_from=collapse_from))
.reset_index(drop=True))
return full
def build_sankey_flow_table(
df_or_tbl: pd.DataFrame | None,
seg="ALL", mod="ALL", loy="ALL",
collapse_to_buy=True,
collapse_from=("์„ ํ˜ธ","์ถ”์ฒœ","๊ตฌ๋งค์˜ํ–ฅ")
):
if df_or_tbl is None or df_or_tbl.empty:
return pd.DataFrame(columns=["from_stage","to_stage","count","dist","kind","to_buy","flow_phi"])
s = df_or_tbl.copy()
low = {str(c).strip().lower(): c for c in s.columns}
looks_table = (("from_stage" in low and "to_stage" in low) and
(("count" in low) or ("flow_phi" in low) or ("bayesian_flow_count" in low)))
if looks_table:
t = _sanitize_sankey_table(
s, seg=seg, mod=mod, loy=loy,
enforce_single_level=True, drop_overall_if_mixed=True
)
if collapse_to_buy:
t = add_collapsed_to_buy(t, add_from=collapse_from)
return t
return _sankey_build_table(
s, seg=seg, mod=mod, loy=loy,
collapse_to_buy=collapse_to_buy, collapse_from=collapse_from
)
def sankey_figure(
df_sankey: pd.DataFrame | None,
seg, mod, loy,
normalize=False, base_stage="์ „์ฒด",
drag=False, show_kind=True,
table_override: pd.DataFrame | None = None,
):
# โ”€โ”€ 0) ๋ ˆ๊ฑฐ์‹œ/์‹ค์ˆ˜ ํ˜ธํ™˜: normalize ์ž๋ฆฌ์— DataFrame์ด ๋“ค์–ด์˜จ ๊ฒฝ์šฐ ๋ณด์ •
# (์Šค๋ชจํฌ ํ…Œ์ŠคํŠธ์—์„œ positional๋กœ override๊ฐ€ ๋“ค์–ด์˜ค๋Š” ํŒจํ„ด ๋ฐฉ์ง€)
if isinstance(normalize, pd.DataFrame) and table_override is None:
table_override = normalize
normalize = False # ์˜๋ฏธ ์—†๋Š” ๊ฐ’์ด์—ˆ์œผ๋ฏ€๋กœ ์•ˆ์ „ ๊ธฐ๋ณธ๊ฐ’
# โ”€โ”€ 1) ํ…Œ์ด๋ธ” ์†Œ์Šค ์„ ํƒ
if table_override is not None:
# override๊ฐ€ raw์—ฌ๋„ ์•ˆ์ „ํ•˜๊ฒŒ ์ •๊ทœํ™”/๋ณด๊ฐ•
g = _sanitize_sankey_table(table_override, seg=seg, mod=mod, loy=loy)
else:
g = build_sankey_flow_table(df_sankey, seg=seg, mod=mod, loy=loy, collapse_to_buy=True)
if g is None or g.empty:
return _empty_fig("No Sankey data")
# โ”€โ”€ 2) ์ƒ‰/์ธ๋ฑ์Šค ์ค€๋น„
idx = {v:i for i,v in enumerate(STAGES)}
STAGE_COLOR = {
"์ „์ฒด": COL_STAGE_OVERALL,
"๋ฏธ์„ ํ˜ธ": COL_STAGE_NONPREF,
"์„ ํ˜ธ": COL_STAGE_PREF,
"์ถ”์ฒœ": COL_STAGE_REC,
"๊ตฌ๋งค์˜ํ–ฅ": COL_STAGE_INTENT,
"๊ตฌ๋งค": COL_STAGE_BUY,
}
# โ˜… ์—ฌ๊ธฐ ํ•œ ์ค„: Sankey์—์„œ '์ „์ฒด'๋งŒ ๊ฒ€์ •์œผ๋กœ
STAGE_COLOR["์ „์ฒด"] = "#000000" # ๋˜๋Š” COL_BLACK
node_colors = [STAGE_COLOR[s] for s in STAGES]
# โœ… ๋…ธ๋“œ x ์ขŒํ‘œ๋„ 6๊ฐœ๋กœ
xs = [0.00, 0.18, 0.34, 0.54, 0.74, 0.94]
# โ”€โ”€ 3) ๊ทธ๋ฆผ
fig = go.Figure()
fig.add_trace(go.Sankey(
arrangement=("freeform" if drag else "fixed"),
valueformat=",.1f", valuesuffix=" ฯ†",
node=dict(
pad=14, thickness=18, label=STAGES,
x=xs, y=[0.50]*len(STAGES),
color=node_colors, line=dict(color="#9aa0a6", width=0.7),
),
link=dict(
source=[idx[a] for a in g["from_stage"]],
target=[idx[b] for b in g["to_stage"]],
value=g["flow_phi"].astype(float).tolist(),
color=(
np.where(g["kind"].astype(str)=="์ง์ ‘",
hex_to_rgba(COL_LINK_DIRECT, 0.90),
hex_to_rgba(COL_LINK_INDIRECT, 0.70))
if show_kind else [hex_to_rgba(COL_LINK_DIRECT, 0.85)] * len(g)
).tolist(),
customdata=np.stack([
g["kind"].astype(str).to_numpy(),
g["dist"].astype(int).to_numpy(),
g["count"].astype(float).to_numpy(),
], axis=-1),
hovertemplate=(
"%{customdata[0]} | %{source.label} โ†’ %{target.label}"
"<br>์ ํ”„: %{customdata[1]}๋‹จ๊ณ„"
"<br>์‹ค์ œ์œ ๋Ÿ‰: %{customdata[2]:,} (ํ‘œ์‹œ %{value:,.1f} ฯ†)"
"<extra></extra>"
),
),
))
if show_kind:
fig.add_trace(go.Scatter(x=[None], y=[None], mode="markers",
marker=dict(size=10, color=hex_to_rgba(COL_LINK_DIRECT, 0.90)), name="์ง์ ‘(์ธ์ ‘)"))
fig.add_trace(go.Scatter(x=[None], y=[None], mode="markers",
marker=dict(size=10, color=hex_to_rgba(COL_LINK_INDIRECT, 0.70)), name="๊ฐ„์ ‘(์Šคํ‚ต)"))
base = base_stage if base_stage in STAGES else "์ „์ฒด"
tot_dir = float(g.loc[g["kind"]=="์ง์ ‘", "flow_phi"].sum())
tot_ind = float(g.loc[g["kind"]=="๊ฐ„์ ‘", "flow_phi"].sum())
# sankey_figure ๋๋ถ€๋ถ„
fig.update_layout(
title=f"Journey Sankey ยท ๋ชจ๋“  ์ˆœ๋ฐฉํ–ฅ(์Šคํ‚ต ํฌํ•จ) ยท ๊ธฐ์ค€={base}",
height=390, showlegend=True,
paper_bgcolor="#fff", plot_bgcolor="#fff",
font=dict(color="#111"),
margin=dict(l=10, r=10, t=32, b=64),
)
fig.add_annotation(
x=0, y=-0.20, xref="paper", yref="paper",
showarrow=False, align="left",
text=f"์ง์ ‘ {tot_dir:,.1f} ฯ† ยท ๊ฐ„์ ‘ {tot_ind:,.1f} ฯ†",
font=dict(size=11, color="#444")
)
# โ†“โ†“โ†“ ์ด ๋„ค ์ค„์€ ๋ฐ˜๋“œ์‹œ ํ•จ์ˆ˜ ์•ˆ์ชฝ(๊ฐ™์€ ๋“ค์—ฌ์“ฐ๊ธฐ ๋ ˆ๋ฒจ)์ด์–ด์•ผ ํ•จ
fig = apply_dense_grid(fig) # ๊ณตํ†ต ์Šคํƒ€์ผ
# Sankey ์ „์šฉ: ์ถ• ๊ฐ์ถ”๊ธฐ(์นดํ…Œ์‹œ์•ˆ ์ถ• ์—†์Œ)
fig.update_xaxes(visible=False, showgrid=False, zeroline=False, fixedrange=True)
fig.update_yaxes(visible=False, showgrid=False, zeroline=False, fixedrange=True)
return fig
# ==== STAGE COLORS (์ „์ฒดโ†’์„ ํ˜ธโ†’์ถ”์ฒœโ†’์˜ํ–ฅโ†’๊ตฌ๋งค) ====
COL_STAGE_OVERALL = "#C32C2C" # ๋นจ
COL_STAGE_PREF = "#D24D3E" # ์ฃผ
COL_STAGE_REC = "#DE937A" # ๋…ธ
COL_STAGE_INTENT = "#D49442" # ๋ฒ (๊ณจ๋“œํ†ค)
COL_STAGE_BUY = "#2B8E81" # ์ดˆ๋ก โ† ์˜คํƒ€ ์ˆ˜์ •
COL_STAGE_NONPREF = "#9CA3AF" # ๋ฏธ์„ ํ˜ธ(ํšŒ์ƒ‰)
def matrix_funnel_figure(row, df_tm, seg, mod, loy, **kwargs):
"""
๋ˆ„์  ํผ๋„:
- ํผ์„ผํŠธ ๋ฌธ์ž์—ด(์˜ˆ: '45.5%')/๊ณต๋ฐฑ ์„ž์—ฌ๋„ robust parsing
- ๊ฐ’์ด ๋น„์–ด๋„(drop/success ๋‘˜ ๋‹ค NaN) ์ตœ์†Œ 2๋‹จ๊ณ„ ์ด์ƒ ๊ฐ•์ œ๋กœ ๊ทธ๋ ค์คŒ
- ๊ธฐ๋ณธ ๋†’์ด 420 (FUNNEL_H๊ฐ€ ์žˆ์œผ๋ฉด ๊ทธ ๊ฐ’ ๋”ฐ๋ฆ„)
"""
# --- Robust percent parser -------------------------------------------------
def _p(x):
if x is None:
return np.nan
if isinstance(x, str):
s = x.strip()
if not s:
return np.nan
if s.endswith("%"):
try:
return float(s[:-1].strip()) / 100.0
except Exception:
return np.nan
try:
return float(s)
except Exception:
return np.nan
try:
x = float(x)
except Exception:
return np.nan
# 1.5 ์ดˆ๊ณผ๋ฉด ํผ์„ผํŠธ๋กœ ๊ฐ„์ฃผ(23 => 0.23)
return x / 100.0 if x > 1.5 else x
def _clip01(v):
return np.nan if not np.isfinite(v) else float(min(1.0, max(0.0, v)))
# --- 1) ๋“œ๋กญ/์ตœ์ข…์œจ ํ™•๋ณด ---------------------------------------------------
d1_raw, d2_raw, d3_raw, full_raw = drops_from_anywhere(row, df_tm, seg, mod, loy)
d1, d2, d3 = map(_clip01, map(_p, (d1_raw, d2_raw, d3_raw)))
full_conv = _p(full_raw)
# --- 2) ๋‹จ๊ณ„๋ณ„ ์„ฑ๊ณต๋ฅ  ------------------------------------------------------
pref_sr = _p(row.get("pref_success_rate"))
rec_sr = _p(row.get("rec_success_rate"))
intent_sr = _p(row.get("intent_success_rate"))
buy_sr = _p(row.get("buy_success_rate"))
# --- 3) ๋ˆ„์ ์œจ ๊ณ„์‚ฐ(๋“œ๋กญ์šฐ์„ , ๊ฒฐ์ธก ํด๋ฐฑ) -----------------------------------
overall = 1.0
pref = pref_sr
rec = pref * (1 - d1) if np.isfinite(pref) and np.isfinite(d1) else rec_sr
intent = rec * (1 - d2) if np.isfinite(rec) and np.isfinite(d2) else intent_sr
if np.isfinite(intent) and np.isfinite(d3):
buy = intent * (1 - d3)
elif np.isfinite(buy_sr):
buy = buy_sr
elif np.isfinite(full_conv):
buy = full_conv
else:
buy = intent
# ๋‹จ์กฐ๊ฐ์†Œ ๋ณด์žฅ + [0,1] ํด๋ฆฌํ•‘
seq = [overall, _clip01(pref), _clip01(rec), _clip01(intent), _clip01(buy)]
for i in range(1, len(seq)):
if np.isfinite(seq[i]) and np.isfinite(seq[i-1]) and seq[i] > seq[i-1]:
seq[i] = seq[i-1]
overall, pref, rec, intent, buy = seq
# --- 4) ๋ผ๋ฒจ/๊ฐ’ ๊ตฌ์„ฑ(๋น„์–ด๋„ ํ•ญ์ƒ ๊ทธ๋ฆฌ๊ธฐ) -----------------------------------
labels, values = ["์ „์ฒด"], [overall]
if np.isfinite(pref): labels.append("์„ ํ˜ธ"); values.append(pref)
if np.isfinite(rec): labels.append("์ถ”์ฒœ"); values.append(rec)
if np.isfinite(intent): labels.append("๊ตฌ๋งค์˜ํ–ฅ"); values.append(intent)
if np.isfinite(buy): labels.append("๊ตฌ๋งค"); values.append(buy)
if len(labels) <= 1:
# ๋“œ๋กญ๋ฅ  ๊ธฐ๋ฐ˜์œผ๋กœ ์ตœ์†Œ 2๋‹จ๊ณ„๋ผ๋„ ๊ตฌ์„ฑ
v = [1.0]
if np.isfinite(d1): v.append(v[-1]*(1-d1))
if np.isfinite(d2): v.append(v[-1]*(1-d2))
if np.isfinite(d3): v.append(v[-1]*(1-d3))
if len(v) == 1:
est = _clip01(buy_sr if np.isfinite(buy_sr) else full_conv)
v.append(0.0 if not np.isfinite(est) else est)
names = ["์ „์ฒด","์„ ํ˜ธ","์ถ”์ฒœ","๊ตฌ๋งค์˜ํ–ฅ","๊ตฌ๋งค"][:len(v)]
labels, values = names, v
txtpos = ["inside" if v >= 0.07 else "outside" for v in values]
color_map = {
"์ „์ฒด": hex_to_rgba(COL_STAGE_OVERALL, 0.85),
"์„ ํ˜ธ": hex_to_rgba(COL_STAGE_PREF, 0.85),
"์ถ”์ฒœ": hex_to_rgba(COL_STAGE_REC, 0.85),
"๊ตฌ๋งค์˜ํ–ฅ": hex_to_rgba(COL_STAGE_INTENT, 0.85),
"๊ตฌ๋งค": hex_to_rgba(COL_STAGE_BUY, 0.85),
}
colors = [color_map.get(l, hex_to_rgba(COL_GRAY, 0.85)) for l in labels]
fig = go.Figure(go.Funnel(
y=labels,
x=values,
name="๋ˆ„์ ์œจ",
customdata=values,
textinfo="none",
texttemplate="%{customdata:.1%}",
textposition=txtpos,
hovertemplate="%{label}: %{customdata:.1%}<extra></extra>",
marker=dict(color=colors, line=dict(width=0.6, color="rgba(0,0,0,0.25)")),
connector=dict(line=dict(color="rgba(0,0,0,0.25)", width=0.6)),
))
# โ€” ๋†’์ด ํ™•์žฅ & ์—ฌ๋ฐฑ ๋‹ค์ด์–ดํŠธ
fig.update_layout(
title="Funnel (๋ˆ„์ ์œจ)",
height=FUNNEL_H if 'FUNNEL_H' in globals() else 420,
margin=dict(l=6, r=6, t=26, b=14),
paper_bgcolor="#ffffff",
plot_bgcolor="#ffffff",
)
fig.update_xaxes(dtick=_auto_dtick(1.0), tickformat=".0%")
return apply_dense_grid(fig, x_prob=True)
def survival_curve_figure(row, df_tm, seg, mod, loy):
d1, d2, d3, _ = drops_from_anywhere(row, df_tm, seg, mod, loy)
vals = [1.0]
if np.isfinite(d1): vals.append(vals[-1]*(1-d1))
if np.isfinite(d2): vals.append(vals[-1]*(1-d2))
if np.isfinite(d3): vals.append(vals[-1]*(1-d3))
if len(vals) == 1: return _empty_fig("No Survival data")
stages = ["Start","์„ ํ˜ธ","์ถ”์ฒœ","๊ตฌ๋งค์˜ํ–ฅ","๊ตฌ๋งค"][:len(vals)]
xs = list(range(len(vals)))
fig = go.Figure()
fig.add_trace(go.Scatter(
x=xs, y=vals, mode="lines+markers",
line=dict(width=3, color=COL_GRAY), marker=dict(color=COL_GREEN_LITE),
hovertemplate="๋‹จ๊ณ„=%{text}<br>์ƒ์กด=%{y:.1%}<extra></extra>", text=stages, name="์ƒ์กดํ™•๋ฅ "
))
drops = [d1,d2,d3]
for i, dv in enumerate(drops, start=1):
if i < len(vals) and np.isfinite(dv):
fig.add_annotation(x=i-0.5, y=(vals[i-1]+vals[i])/2,
text=f"์‹คํŒจ {dv:.1%}", showarrow=False,
font=dict(size=11, color=COL_ORANGE))
fig.update_layout(height=320, title="์Šคํ…Œ์ด์ง€ ์ƒ์กด ์ปค๋ธŒ",
xaxis=dict(tickmode="array", tickvals=xs, ticktext=stages),
yaxis=dict(range=[0,1], tickformat=".1%"))
return apply_dense_grid(fig, y_prob=True)
def waterfall_figure(row, df_tm, seg, mod, loy):
d1, d2, d3, full = drops_from_anywhere(row, df_tm, seg, mod, loy)
def _as_prob(p):
p = _safe_num(p)
if not np.isfinite(p): return np.nan
return p/100.0 if p > 1.5 else p
d1, d2, d3 = map(_as_prob, [d1, d2, d3])
buy_sr = _as_prob(row.get("buy_success_rate"))
intent = _as_prob(row.get("intent_success_rate"))
full_in = _as_prob(full)
# ์ตœ์ข… ๊ตฌ๋งค์œจ ๋ณด์ •
full = full_in
if not np.isfinite(full):
if np.isfinite(buy_sr): full = buy_sr
elif np.isfinite(intent) and np.isfinite(d3): full = intent * (1.0 - d3)
elif all(np.isfinite([d1, d2, d3])): full = (1.0 - d1) * (1.0 - d2) * (1.0 - d3)
# ์ ˆ๋Œ€ ๋“œ๋กญ
if all(np.isfinite([d1, d2, d3])):
drop1 = 1.0 * d1
drop2 = (1.0 - d1) * d2
drop3 = (1.0 - d1) * (1.0 - d2) * d3
else:
drop1 = d1 if np.isfinite(d1) else 0.0
drop2 = d2 if np.isfinite(d2) else 0.0
drop3 = d3 if np.isfinite(d3) else 0.0
# ์ตœ์ข…์œจ ๋ฏธ์ง€์ •์ด๋ฉด ๋“œ๋กญ ํ•ฉ์œผ๋กœ ๋ณด์ •
final_rate = float(full) if np.isfinite(full) else max(0.0, 1.0 - drop1 - drop2 - drop3)
if not any(np.isfinite(v) for v in [drop1, drop2, drop3]) and not np.isfinite(final_rate):
return _empty_fig("No Waterfall data")
def _fmt_drop(v):
return "" if not np.isfinite(v) else (f"-{v:.1%}" if v >= 1e-6 else "-0.0%")
# โ˜… ์—ฌ๊ธฐ๋ถ€ํ„ฐ: '์ „์ฒด 100%' ๋ง‰๋Œ€ ์ œ๊ฑฐ ๋ฒ„์ „
measures = ["relative", "relative", "relative", "total"]
x = ["์„ ํ˜ธโ†’์ถ”์ฒœ<br>Drop", "์ถ”์ฒœโ†’๊ตฌ๋งค์˜ํ–ฅ<br>Drop", "๊ตฌ๋งค์˜ํ–ฅโ†’๊ตฌ๋งค<br>Drop", "๊ตฌ๋งค์œจ"]
y = [-drop1, -drop2, -drop3, final_rate]
texts = [_fmt_drop(drop1), _fmt_drop(drop2), _fmt_drop(drop3), f"{final_rate:.1%}"]
positions = ["inside", "inside", "inside", "outside"]
fig = go.Figure(go.Waterfall(
measure=measures, x=x, y=y,
name="drop-off",
text=texts, textposition=positions,
insidetextfont=dict(color="white"),
outsidetextfont=dict(color="#111"),
decreasing={"marker":{"color": COL_GRAY_MED}},
increasing={"marker":{"color": COL_GRAY_MED}},
totals={"marker":{"color": COL_BLUE_DEEP}},
connector={"line":{"color":"rgba(0,0,0,0.25)", "width":0.6}},
cliponaxis=False, constraintext="both"
))
fig.update_layout(
height=320,
title="๋“œ๋กญ์˜คํ”„ ์›Œํ„ฐํด",
yaxis_tickformat=".1%",
xaxis=dict(tickangle=0, automargin=True),
margin=dict(l=8, r=8, t=30, b=14), # ์ขŒ์šฐ ์—ฌ๋ฐฑ ์‚ด์ง ๋” ์ค„์ž„
uniformtext_minsize=9, uniformtext_mode="hide",
)
# ๊ณตํ†ต ์Šคํƒ€์ผ ๋จผ์ €
fig = apply_dense_grid(fig, y_prob=True)
# โ”€โ”€ ์›Œํ„ฐํด ๊ฐ€๋…์„ฑ ํŠœ๋‹(Apply ํ›„ ๋‹ค์‹œ ๋ฎ์–ด์“ฐ๊ธฐ)
fig.update_layout(
showlegend=False, # ๋ฒ”๋ก€ ์ˆจ๊ฒจ ์ƒ๋‹จ ๊ณต๊ฐ„ ํ™•๋ณด
bargap=0.15, # ๋ฐ” ์‚ฌ์ด ๊ฐ„๊ฒฉ ์ถ•์†Œ โ†’ ๋ง‰๋Œ€๊ฐ€ ๋‘ํˆผํ•˜๊ฒŒ
margin=dict(l=8, r=8, t=30, b=14),
)
fig.update_xaxes(automargin=True)
return fig
def stacked_funnel_figure(row):
stages = [("์„ ํ˜ธ", "pref_success_rate"), ("์ถ”์ฒœ", "rec_success_rate"),
("๊ตฌ๋งค์˜ํ–ฅ", "intent_success_rate"), ("๊ตฌ๋งค", "buy_success_rate")]
succ = []; fail = []; labs=[]
for lab, col in stages:
p = _safe_num(row.get(col))
if np.isfinite(p):
succ.append(p); fail.append(1-p); labs.append(lab)
if not succ: return _empty_fig("No Funnel data")
fig = go.Figure()
fig.add_bar(x=labs, y=succ, name="์„ฑ๊ณต", text=[f"{v:.1%}" for v in succ], textposition="inside",
marker_color=COL_GREEN_LITE)
fig.add_bar(x=labs, y=fail, name="์‹คํŒจ", text=[f"{v:.1%}" for v in fail], textposition="inside",
marker_color=COL_RED)
fig.update_layout(barmode="stack", yaxis=dict(range=[0,1], tickformat=".1%"),
height=320, title="100% ์Šคํƒ ํผ๋„ (์„ฑ๊ณต/์‹คํŒจ)")
return apply_dense_grid(fig, y_prob=True)
def forest_figure(df_scope: pd.DataFrame):
if df_scope is None or df_scope.empty:
return _empty_fig("No Forest data")
if not {"model", "segment"}.issubset(set(df_scope.columns)):
return _empty_fig("Need 'model' and 'segment'")
s = df_scope.copy()
# ----- 1) ์‚ฌ์šฉํ•  ๋‹จ๊ณ„(์„ฑ๊ณต๋ฅ ) ์„ ํƒ: buy โ†’ intent โ†’ rec โ†’ pref โ†’ success_rate โ†’ rate
stage_order = [
("buy", "buy_success_rate"),
("intent", "intent_success_rate"),
("rec", "rec_success_rate"),
("pref", "pref_success_rate"),
("", "success_rate"),
("", "rate"),
]
stage = ""
rate_col = None
for st, col in stage_order:
if col in s.columns:
stage, rate_col = st, col
break
if rate_col is None:
return _empty_fig("No rate column")
# ----- 2) ํ‘œ๋ณธ(n) ์ปฌ๋Ÿผ ์ฐพ๊ธฐ(๋‹จ๊ณ„๋ณ„ ์šฐ์„ , ์—†์œผ๋ฉด ์ผ๋ฐ˜ ํ‘œ๋ณธ๋ช…์œผ๋กœ ํด๋ฐฑ)
def _find_n_col(stage_name: str) -> str | None:
cands = []
if stage_name:
cands += [f"{stage_name}_sample_size", f"{stage_name}_n", f"{stage_name}_total"]
cands += ["sample_size", "n", "N", "total", "count", "nobs", "๋ฒ ์ด์Šค์ˆ˜", "ํ‘œ๋ณธ์ˆ˜", "pref_sample_size"]
for c in cands:
if c in s.columns:
return c
return None
n_col = _find_n_col(stage)
if n_col is None:
return _empty_fig("No sample size column")
# ----- 3) ์ˆซ์žํ™” + ๋น„์œจ ์ •๊ทœํ™”
s[rate_col] = pd.to_numeric(s[rate_col], errors="coerce")
s[n_col] = pd.to_numeric(s[n_col], errors="coerce")
s = s.dropna(subset=[rate_col, n_col])
if s.empty:
return _empty_fig("No Forest values")
r = np.where(s[rate_col] > 1.5, s[rate_col] / 100.0, s[rate_col]) # % โ†’ ๋น„์œจ
r = np.clip(r, 0.0, 1.0)
n = np.clip(s[n_col].to_numpy().astype(float), 0.0, np.inf)
k = np.clip(np.round(r * n), 0.0, n) # ์„ฑ๊ณต ์ˆ˜ ์ถ”์ •
# ----- 4) ๋ชจ๋ธ ๋‹จ์œ„๋กœ ์ง‘๊ณ„(์ค‘๋ณต y์ถ• ์ œ๊ฑฐ)
agg = (pd.DataFrame({
"model": s["model"].astype(str),
"segment": s["segment"].astype(str),
"k": k, "n": n
})
.groupby("model", as_index=False)
.agg(k=("k","sum"), n=("n","sum"), seg=("segment", lambda x: x.iloc[0])))
if agg.empty or not np.isfinite(agg["n"]).any():
return _empty_fig("No Forest values")
# ----- 5) Jeffreys 95% CI
alpha = 0.05
try:
from scipy.stats import beta as _beta
agg["p"] = (agg["k"] + 0.5) / (agg["n"] + 1.0)
agg["lo"] = _beta.ppf(alpha/2, agg["k"] + 0.5, agg["n"] - agg["k"] + 0.5)
agg["hi"] = _beta.ppf(1 - alpha/2, agg["k"] + 0.5, agg["n"] - agg["k"] + 0.5)
except Exception:
try:
from statsmodels.stats.proportion import proportion_confint
agg["p"] = (agg["k"] + 0.5) / (agg["n"] + 1.0)
lo, hi = proportion_confint(agg["k"], agg["n"], alpha=alpha, method="beta")
agg["lo"], agg["hi"] = lo, hi
except Exception:
# Wilson ํด๋ฐฑ
z = 1.959963984540054
p = agg["k"] / agg["n"]
denom = 1 + z*z/agg["n"]
center = (p + z*z/(2*agg["n"])) / denom
half = z*np.sqrt((p*(1-p) + z*z/(4*agg["n"])) / agg["n"]) / denom
agg["p"] = p
agg["lo"] = np.maximum(0.0, center - half)
agg["hi"] = np.minimum(1.0, center + half)
use = agg.sort_values("p").reset_index(drop=True)
# ----- 6) ์ƒ‰(๋ชจ๋ธ์˜ ์šฐ์„ธ ์„ธ๊ทธ๋จผํŠธ) ์ง€์ •
dom_seg = _model_dominant_segment(df_scope)
mapped_seg = use["model"].map(dom_seg).fillna(use["seg"])
colors = mapped_seg.apply(_tier_color_for_segment).tolist()
err_plus = (use["hi"] - use["p"]).to_numpy()
err_minus = (use["p"] - use["lo"]).to_numpy()
# ----- 7) ํ”Œ๋กฏ
fig = go.Figure()
fig.add_trace(go.Scatter(
x=use["p"].astype(float),
y=use["model"].astype(str),
mode="markers",
name="๋ชจ๋ธ", # โ† trace ์ด๋ฆ„ ์ง€์ • (trace 0 ์ œ๊ฑฐ)
hovertemplate="%{y}: %{x:.1%}<extra></extra>",
marker=dict(size=10, color=colors, line=dict(color=COL_BLACK, width=1.6)),
))
fig.update_traces(error_x=dict(
type="data", symmetric=False,
array=err_plus, arrayminus=err_minus,
color=COL_BLACK, thickness=1.2, width=3
))
add_vline_safe(fig, 0.5, line_dash="dot", line_color=COL_BLACK, opacity=0.4)
fig.update_layout(
height=320,
title="ํฌ๋ ˆ์ŠคํŠธ ํ”Œ๋กฏ (๋ชจ๋ธ ๋น„๊ต) โ€” 95% CI",
xaxis=dict(range=[0, 1], dtick=0.1, tickformat=".0%", title="์„ฑ๊ณต๋ฅ "),
margin=dict(l=10, r=10, t=54, b=18),
showlegend=False,
)
fig = apply_dense_grid(fig, x_prob=True)
fig.update_layout(margin=dict(l=10, r=10, t=78, b=24)) # ์ƒ๋‹จ ์—ฌ๋ฐฑ ํ‚ค์›€
fig.update_yaxes(domain=[0.12, 1.00]) # ์œ„์ชฝ 12% ๋น„์›Œ์„œ ์•„๋ž˜๋กœ ๋‚ด๋ฆผ
return fig
def compare_distribution_figure(df_master, seg, mod, loy, stage_label):
if df_master is None or df_master.empty:
return _empty_fig("No Ranking data")
seg = _as_all(seg); mod = _as_all(mod); loy = _as_all(loy)
stage2lift = {
"์„ ํ˜ธ": "pref_lift_vs_galaxy",
"์ถ”์ฒœ": "rec_lift_vs_galaxy",
"๊ตฌ๋งค์˜ํ–ฅ": "intent_lift_vs_galaxy",
"๊ตฌ๋งค": "buy_lift_vs_galaxy",
}
lift_col = stage2lift.get(stage_label, "buy_lift_vs_galaxy")
if lift_col not in df_master.columns:
return _empty_fig("No lift column")
# 1) ๋น„๊ต ์ถ• ๊ณ ๋ฅด๊ธฐ
candidates = []
if mod == "ALL": candidates.append("model")
if seg == "ALL": candidates.append("segment")
if loy == "ALL": candidates.append("loyalty")
key = None
for k in candidates:
if k in df_master.columns and df_master[k].astype(str).nunique(dropna=True) > 1:
key = k
break
if key is None:
# fallback: ์œ ๋‹ˆํฌ ๊ฐ€์žฅ ๋งŽ์€ ์ถ•
avail = [c for c in ["model","segment","loyalty"] if c in df_master.columns]
if not avail:
return _empty_fig("No grouping key")
key = max(avail, key=lambda c: df_master[c].astype(str).nunique(dropna=True))
# 2) ์ „์ฒด/์„ ํƒ ์ง‘๊ณ„
overall = (df_master.groupby(key, as_index=False)
.agg({lift_col: "mean"})
.rename(columns={lift_col: "์ „์ฒด"}))
scope = df_master.copy()
if seg != "ALL": scope = scope[scope["segment"].astype(str) == seg]
if mod != "ALL": scope = scope[scope["model"].astype(str) == mod]
if loy != "ALL": scope = scope[scope["loyalty"].astype(str) == loy]
if scope.empty:
return _empty_fig("No values")
selected = (scope.groupby(key, as_index=False)
.agg({lift_col: "mean"})
.rename(columns={lift_col: "์„ ํƒ"}))
merged = pd.merge(overall, selected, on=key, how="outer")
if merged.empty:
return _empty_fig("No values")
# 3) ์ •๋ฆฌ: ํ‚ค๋Š” ๋ฌธ์ž์—ด๋กœ, ๊ฒฐ์ธก ์ˆ˜์น˜๋งŒ 0.0์œผ๋กœ
merged[key] = merged[key].astype(str)
for col in ["์ „์ฒด", "์„ ํƒ"]:
if col in merged.columns:
merged[col] = pd.to_numeric(merged[col], errors="coerce")
merged[["์ „์ฒด","์„ ํƒ"]] = merged[["์ „์ฒด","์„ ํƒ"]].fillna(0.0)
# ์ •๋ ฌ ์ˆœ์„œ(์„ ํƒ ์˜ค๋ฆ„์ฐจ์ˆœ์ด ๊ธฐ๋ณธ, ์ „๋ถ€ 0์ด๋ฉด ์ „์ฒด ๊ธฐ์ค€)
if (merged["์„ ํƒ"] != 0).any():
order = merged.sort_values("์„ ํƒ", ascending=True)[key].tolist()
else:
order = merged.sort_values("์ „์ฒด", ascending=True)[key].tolist()
base = merged.set_index(key).loc[order]
# 4) ์ƒ‰์ƒ
vals_sel = base["์„ ํƒ"].to_numpy()
if key == "model":
dom_seg = _model_dominant_segment(df_master)
bar_colors = [_tier_color_for_segment(dom_seg.get(k, "LowEnd")) for k in order]
else:
bar_colors = royg_color_for(vals_sel)
# 5) ๊ทธ๋ฆผ
fig = go.Figure()
fig.add_trace(go.Bar(
x=base["์ „์ฒด"], y=order, orientation="h", name="์ „์ฒด",
marker_color="rgba(150,150,150,0.35)"
))
fig.add_trace(go.Bar(
x=vals_sel, y=order, orientation="h", name="์„ ํƒ",
marker=dict(color=bar_colors, line=dict(color=COL_GRAY, width=0.5)),
text=[f"{v:+.1f}" for v in vals_sel], textposition="outside"
))
add_vline_safe(fig, 0, line_dash="dot", line_color=COL_GRAY)
fig.update_layout(
barmode="group",
title=f"{stage_label} Lift ({key})",
height=320,
margin=dict(l=10, r=10, t=54, b=18),
paper_bgcolor="#ffffff", plot_bgcolor="#ffffff"
)
# ๊ณตํ†ต ์Šคํƒ€์ผ ๋จผ์ €
fig = apply_dense_grid(fig)
# 3) ์œ„๋กœ ๋“ค๋Ÿฌ๋ถ™๋Š” ๊ฒƒ ๋ฐฉ์ง€์šฉ์œผ๋กœ '์œ„ ์—ฌ๋ฐฑ+๋„๋ฉ”์ธ' ๋ฎ์–ด์“ฐ๊ธฐ
fig.update_layout(margin=dict(l=10, r=10, t=68, b=28))
fig.update_yaxes(domain=[0.0, 0.86]) # ์œ„์ชฝ 14% ๋น„์›Œ์„œ ์•„๋ž˜๋กœ ๋‚ด๋ฆผ
# 4) ๋ฆฌํ„ด
return fig
def bubble_figure(
df_scope: pd.DataFrame,
lift_col: str,
snr_col: str,
label_top_n: int = 4,
label_inside: bool = False,
textfont_size: int = 11
) -> go.Figure:
# --- ๊ฐ€๋“œ ---
if df_scope is None or df_scope.empty:
return _empty_fig("No Bubble data")
if lift_col not in df_scope.columns or snr_col not in df_scope.columns:
return _empty_fig("No Bubble data")
s = df_scope.copy()
s[lift_col] = pd.to_numeric(s[lift_col], errors="coerce")
s[snr_col] = pd.to_numeric(s[snr_col], errors="coerce")
s["pref_sample_size"] = pd.to_numeric(
s.get("pref_sample_size", pd.Series(1, index=s.index)),
errors="coerce"
).fillna(1.0)
key = "model" if ("model" in s.columns and s["model"].notna().any()) else (
"segment" if "segment" in s.columns else None)
if key is None:
return _empty_fig("No Bubble key")
need_cols = [key, lift_col, snr_col, "pref_sample_size"]
if "segment" in s.columns and "segment" not in need_cols:
need_cols.append("segment")
use = s[need_cols].dropna(subset=[lift_col, snr_col])
if use.empty:
return _empty_fig("No Bubble values")
# ---- ์ง‘๊ณ„ ----
if "segment" in use.columns:
grp = (use.groupby(key, as_index=False)
.agg(x=(lift_col, "mean"),
y=(snr_col, "mean"),
n=("pref_sample_size", "sum"),
seg=("segment", "first")))
else:
grp = (use.groupby(key, as_index=False)
.agg(x=(lift_col, "mean"),
y=(snr_col, "mean"),
n=("pref_sample_size", "sum")))
grp["seg"] = np.nan
# ---- ์ƒ‰์ƒ ----
dom_seg = _model_dominant_segment(df_scope)
def _color_for(row):
if key == "model":
base_seg = dom_seg.get(str(row[key]), row["seg"])
else:
base_seg = row["seg"] if pd.notna(row["seg"]) else row[key]
return _tier_color_for_segment(base_seg)
grp["color"] = grp.apply(_color_for, axis=1)
# ---- ๋ฒ„๋ธ” ํฌ๊ธฐ(โˆš์Šค์ผ€์ผ) ----
n = grp["n"].astype(float).to_numpy()
if np.isfinite(n).any():
r = np.sqrt(np.maximum(n, 0))
r0, r1 = float(np.nanmin(r)), float(np.nanmax(r))
size = 24.0 if abs(r1 - r0) < 1e-9 else 12 + (r - r0)/(r1 - r0) * 48
else:
size = np.full(len(grp), 24.0)
# ---- ๋ผ๋ฒจ ----
labels_all = grp[key].astype(str).tolist()
if label_top_n is None or label_top_n == -1:
text = labels_all
elif label_top_n <= 0:
text = [""] * len(labels_all)
else:
top_idx = np.argsort(-grp["n"].to_numpy())[:label_top_n]
show = set(top_idx.tolist())
text = [labels_all[i] if i in show else "" for i in range(len(labels_all))]
hovertext = grp[key].astype(str)
# ===== ์Šน/ํŒจ ๋ถ„ํ•  ๊ฒฝ๊ณ„ & ์Œ์˜ =====
x_vals = grp["x"].astype(float).to_numpy()
y_vals = grp["y"].astype(float).to_numpy()
x_thr = 0.0 if (np.nanmin(x_vals) < 0 < np.nanmax(x_vals)) else float(np.nanmedian(x_vals))
y_thr = 2.0 if (np.nanmin(y_vals) <= 2.0 <= np.nanmax(y_vals)) else float(np.nanmedian(y_vals))
x_min, x_max = float(np.nanmin(x_vals)), float(np.nanmax(x_vals))
y_min, y_max = float(np.nanmin(y_vals)), float(np.nanmax(y_vals))
x_pad = (x_max - x_min) * 0.03 if np.isfinite(x_max - x_min) else 0.0
y_pad = (y_max - y_min) * 0.03 if np.isfinite(y_max - y_min) else 0.0
x0, x1 = x_min - x_pad, x_max + x_pad
y0, y1 = y_min - y_pad, y_max + y_pad
winner_fill = hex_to_rgba("#FDE68A", 0.16)
loser_fill = hex_to_rgba("#9CA3AF", 0.14)
fig = go.Figure()
add_vrect_safe(fig, x0, x_thr, y0=y_thr, y1=y1, fillcolor=loser_fill, layer="below")
add_vrect_safe(fig, x_thr, x1, y0=y_thr, y1=y1, fillcolor=winner_fill, layer="below")
add_vline_safe(fig, x_thr, line_dash="dot", line_color="#888", opacity=0.6)
add_hline_safe(fig, y_thr, line_dash="dot", line_color="#888", opacity=0.6)
fig.add_trace(go.Scatter(
x=grp["x"], y=grp["y"],
mode="markers+text",
text=text,
hovertext=hovertext,
textposition=("middle center" if label_inside else "top center"),
textfont=dict(size=textfont_size),
cliponaxis=False,
marker=dict(size=size, color=grp["color"], line=dict(color="#111", width=0.7)),
customdata=grp["n"].astype(float),
hovertemplate=(f"{key}=%{{hovertext}}<br>"
"Lift=%{x:.1f}<br>"
"SNR=%{y:.1f}<br>"
"ํ‘œ๋ณธ=%{customdata:,}<extra></extra>"),
name="๋ชจ๋ธ/์„ธ๊ทธ"
))
# ๊ธฐ๋ณธ ๋ ˆ์ด์•„์›ƒ
fig.update_layout(
xaxis_title=None,
yaxis_title="SNR",
height=320,
showlegend=False,
paper_bgcolor="#fff", plot_bgcolor="#fff",
margin=dict(l=10, r=10, t=26, b=48)
)
fig.update_xaxes(title_standoff=18, automargin=True)
fig.update_yaxes(title_standoff=8, automargin=True)
# ๊ฐ์ฃผ
foot_y = -0.20
fig.add_annotation(xref="paper", yref="paper", x=0.00, y=foot_y,
text="<b>โ– </b>", showarrow=False, font=dict(size=11, color="#FDE68A"))
fig.add_annotation(xref="paper", yref="paper", x=0.035, y=foot_y,
text="์Šน์ž ์˜์—ญ (Liftโ†‘, SNRโ†‘)", showarrow=False, font=dict(size=10, color="#555"), xanchor="left")
fig.add_annotation(xref="paper", yref="paper", x=0.32, y=foot_y,
text="<b>โ– </b>", showarrow=False, font=dict(size=11, color="#9CA3AF"))
fig.add_annotation(xref="paper", yref="paper", x=0.355, y=foot_y,
text="ํŒจ์ž ์˜์—ญ (Liftโ†“, SNRโ†‘)", showarrow=False, font=dict(size=10, color="#555"), xanchor="left")
fig.add_annotation(xref="paper", yref="paper", x=0.67, y=foot_y,
text="โ—‹ ์› ํฌ๊ธฐ = ํ‘œ๋ณธ์ˆ˜(โˆš์Šค์ผ€์ผ)", showarrow=False, font=dict(size=10, color="#666"), xanchor="left")
# ๊ณตํ†ต ์Šคํƒ€์ผ ์ ์šฉ ํ›„ '์œ„๋กœ ๋“ค๋Ÿฌ๋ถ™์Œ' ํ•ด์†Œ์šฉ ๋ฎ์–ด์“ฐ๊ธฐ
fig = apply_dense_grid(fig)
fig.update_layout(
height=320,
margin=dict(l=10, r=10, t=84, b=52), # โ†‘ ์ƒ๋‹จ ์—ฌ๋ฐฑ ํฌ๊ฒŒ
title=dict(y=0.98, pad=dict(t=18, b=0)) # ํƒ€์ดํ‹€๋„ ์‚ด์ง ๋‚ด๋ ค์คŒ
)
fig.update_yaxes(domain=[0.12, 1.00], automargin=True) # โ†‘ ํ”Œ๋กฏ ์˜์—ญ ์ž์ฒด๋ฅผ ์•„๋ž˜๋กœ
return fig
def ppc_purchase_overlay_figure(row: pd.Series, m: int | None = None, draws: int = 6000) -> go.Figure:
"""๊ด€์ธก ๊ตฌ๋งค์œจ๊ณผ Posterior(๋ฒ ํƒ€) & Posterior Predictive(๋ฒ ํƒ€-์ดํ•ญ) ์˜ค๋ฒ„๋ ˆ์ด."""
# ๊ด€์ธก์น˜
n = _pick_sample_for_stage(row, "buy")
if n <= 0:
n = _safe_int0(row.get("pref_sample_size"))
p_obs = _safe_num(row.get("buy_success_rate"))
if not np.isfinite(p_obs):
return _empty_fig("No PPC data")
p_obs = float(np.clip(p_obs/100.0 if p_obs > 1.5 else p_obs, 0.0, 1.0))
k_obs = int(np.clip(round(p_obs * max(n, 1)), 0, max(n, 1)))
if m is None:
m = n
# Posterior (Jeffreys prior: Beta(0.5,0.5))
a, b = k_obs + 0.5, (n - k_obs) + 0.5
p = np.random.beta(a, b, size=draws)
# Posterior predictive (์ƒˆ ํ‘œ๋ณธ m๊ฐœ ๊ด€์ธก ์‹œ ๋น„์œจ)
m = max(int(m), 1)
k_pred = np.random.binomial(m, p)
rate_pred = k_pred / m
# 95% HDI
lo, hi = np.quantile(p, [0.025, 0.975])
fig = go.Figure()
fig.add_histogram(
x=p, nbinsx=60, histnorm="probability density",
name="Posterior p", marker_color=hex_to_rgba("#9CA3AF", 0.45), opacity=0.55
)
fig.add_histogram(
x=rate_pred, nbinsx=60, histnorm="probability density",
name=f"PPC n={m:,}", marker_color=hex_to_rgba(COL_STAGE_BUY, 0.55), opacity=0.55
)
# ๊ด€์ธก์น˜/๊ตฌ๊ฐ„ ํ‘œ์‹œ
add_vline_safe(fig, p_obs, line_color="#111", line_width=2, opacity=0.9)
fig.add_vrect(x0=lo, x1=hi, fillcolor=hex_to_rgba("#60A5FA", 0.18), line_width=0)
# โ† ํ•ต์‹ฌ: ๋ฒ”๋ก€๋ฅผ ์•„๋ž˜๋กœ(๋„๋ฉด ๋ฐ–) ๋ณด๋‚ด๊ณ  ์•„์ฃผ ์ž‘๊ฒŒ
fig.update_layout(
barmode="overlay",
title="PPC(๊ตฌ๋งค์œจ) โ€” Posterior & Posterior Predictive",
height=320,
margin=dict(l=10, r=10, t=30, b=64), # ๋ฐ”๋‹ฅ ์—ฌ๋ฐฑ ํ™•๋ณด
showlegend=True,
legend=dict(
orientation="h",
y=-0.22, yanchor="top", # ํ”Œ๋กฏ ์•„๋ž˜์ชฝ, ๋„๋ฉด ๋ฐ–
x=0.0, xanchor="left",
font=dict(size=9),
itemsizing="constant",
itemwidth=30
)
)
fig.update_xaxes(range=[0, 1], tickformat=".0%", title="๊ตฌ๋งค์œจ")
fig.update_yaxes(title="๋ฐ€๋„")
return apply_dense_grid(fig, x_prob=True)
percent1 = FormatTemplate.percentage(1)
num1 = Format(precision=1, scheme=Scheme.fixed)
CARD_STYLE = {
"background": "white",
"border": "none", # โ† ๋ณด๋” ์ œ๊ฑฐ
"borderRadius": "14px",
"padding": "14px",
"boxShadow": "none", # โ† ๊ทธ๋ฆผ์ž๋„ ์ œ๊ฑฐ(์›ํ•˜๋ฉด ์œ ์ง€)
}
# (์ถ”๊ฐ€) KPI ์ „์šฉ ์นด๋“œ โ€” ํ•˜๋Š˜์ƒ‰ ๋ฐฐ๊ฒฝ
KPI_CARD_STYLE = {
**CARD_STYLE,
"background": "#EAF2FF",
"border": "1px solid #d6e4ff"
}
ROW2_CARD_H = 360
ROW2_GRAPH_H = 320
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ spacing knobs (ํ•œ ๊ณณ์—์„œ ์กฐ์ ˆ) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
ROW_GAP = "16px" # ์นด๋“œ ์‚ฌ์ด ๊ฐ„๊ฒฉ
PAGE_PAD = "24px 28px 24px" # ํ–‰ ์•ˆ์ชฝ ํŒจ๋”ฉ
CARD_H = "430px" # ์นด๋“œ(๋ฐ•์Šค) ๋†’์ด
GRAPH_H = "390px" # ์นด๋“œ ์•ˆ ๊ทธ๋ž˜ํ”„ ๋†’์ด (CARD_H๋ณด๋‹ค 40px ์ž‘๊ฒŒ)
KPI_GAP = "12px" # KPI ์นด๋“œ ๊ฐ„๊ฒฉ
ROW1_COLS = "1fr 1fr 1fr" # ์ƒ๋‹จ 3์นด๋“œ ๋™์ผ ๋„ˆ๋น„
ROW2_COLS = "1fr 1fr 1fr" # ํ•˜๋‹จ 3์นด๋“œ ๋™์ผ ๋„ˆ๋น„
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ app.layout ๊ต์ฒด โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ spacing & sizing knobs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
TOP_CARD_H = "430px" # ๋งจ ์œ„ 3๊ฐœ ์นด๋“œ ๋ฐ•์Šค ๋†’์ด
TOP_GRAPH_H = "390px" # ๋ฐ•์Šค ์•ˆ ๊ทธ๋ž˜ํ”„ ๋†’์ด (ํƒญ/์ œ๋ชฉ ์—ฌ๋ฐฑ ๊ณ ๋ คํ•ด TOP_CARD_H - 40)
ROW_CARD_H = "420px" # ์•„๋ž˜ ํ–‰ ์นด๋“œ ๋†’์ด
ROW_GRAPH_H = "380px"
PAGE_PAD = "24px 28px 24px" # ๊ฐ ํ–‰ ๋‚ด๋ถ€ ํŒจ๋”ฉ
ROW_GAP = "16px" # ์นด๋“œ ์‚ฌ์ด ๊ฐ„๊ฒฉ
KPI_GAP = "12px"
# ์นด๋“œ ๊ณตํ†ต ์Šคํƒ€์ผ: flex column์œผ๋กœ ๊ทธ๋ž˜ํ”„๊ฐ€ ๊ฝ‰ ์ฐจ๋„๋ก
CARD_STYLE = {
"background": "#fff",
"borderRadius": "12px",
"padding": "12px",
"boxShadow": "0 1px 3px rgba(0,0,0,0.06)",
"display": "flex",
"flexDirection": "column",
}
# ๊ทธ๋ž˜ํ”„ ๋‚ด๋ถ€ ์—ฌ๋ฐฑ/๋ ˆ์ „๋“œ/ํ…์ŠคํŠธ๋ฅผ ํ†ต์ผํ•ด ๋ณด์ด๋Š” ์˜์—ญ์„ ๋งž์ถค
def standardize_top_fig(fig):
fig.update_layout(
margin=dict(l=28, r=16, t=36, b=28),
title_x=0.02,
title_pad=dict(t=4, b=4),
uniformtext=dict(minsize=10, mode="hide"),
legend=dict(orientation="h", x=0, y=-0.2), # ํ•˜๋‹จ ๊ฐ€๋กœ๋ฐฐ์น˜ โ†’ ๋†’์ด ํŽธ์ฐจ ์ œ๊ฑฐ
)
# ์ถ•์ด ์žˆ๋Š” ์ฐจํŠธ๋Š” automargin
for ax in ("xaxis", "yaxis"):
if ax in fig.layout:
fig.layout[ax].update(automargin=True, title_standoff=6)
return fig
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ app.layout ๊ต์ฒด โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ spacing & sizing knobs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
TOP_CARD_H = "430px" # ๋งจ ์œ„ 3๊ฐœ ์นด๋“œ ๋ฐ•์Šค ๋†’์ด
TOP_GRAPH_H = "390px" # ๋ฐ•์Šค ์•ˆ ๊ทธ๋ž˜ํ”„ ๋†’์ด (ํƒญ/์ œ๋ชฉ ์—ฌ๋ฐฑ ๊ณ ๋ คํ•ด TOP_CARD_H - 40)
ROW_CARD_H = "420px" # ์•„๋ž˜ ํ–‰ ์นด๋“œ ๋†’์ด
ROW_GRAPH_H = "380px"
PAGE_PAD = "24px 28px 24px" # ๊ฐ ํ–‰ ๋‚ด๋ถ€ ํŒจ๋”ฉ
ROW_GAP = "16px" # ์นด๋“œ ์‚ฌ์ด ๊ฐ„๊ฒฉ
KPI_GAP = "12px"
# ์นด๋“œ ๊ณตํ†ต ์Šคํƒ€์ผ: flex column์œผ๋กœ ๊ทธ๋ž˜ํ”„๊ฐ€ ๊ฝ‰ ์ฐจ๋„๋ก
CARD_STYLE = {
"background": "#fff",
"borderRadius": "12px",
"padding": "12px",
"boxShadow": "0 1px 3px rgba(0,0,0,0.06)",
"display": "flex",
"flexDirection": "column",
}
# ๊ทธ๋ž˜ํ”„ ๋‚ด๋ถ€ ์—ฌ๋ฐฑ/๋ ˆ์ „๋“œ/ํ…์ŠคํŠธ๋ฅผ ํ†ต์ผํ•ด ๋ณด์ด๋Š” ์˜์—ญ์„ ๋งž์ถค
def standardize_top_fig(fig):
fig.update_layout(
margin=dict(l=28, r=16, t=36, b=28),
title_x=0.02,
title_pad=dict(t=4, b=4),
uniformtext=dict(minsize=10, mode="hide"),
legend=dict(orientation="h", x=0, y=-0.2), # ํ•˜๋‹จ ๊ฐ€๋กœ๋ฐฐ์น˜ โ†’ ๋†’์ด ํŽธ์ฐจ ์ œ๊ฑฐ
)
# ์ถ•์ด ์žˆ๋Š” ์ฐจํŠธ๋Š” automargin
for ax in ("xaxis", "yaxis"):
if ax in fig.layout:
fig.layout[ax].update(automargin=True, title_standoff=6)
return fig
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ app.layout ๊ต์ฒด โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
ROW_GAP = "16px" # ์นด๋“œ ์‚ฌ์ด ๊ฐ„๊ฒฉ
PAGE_PAD = "24px 28px 24px" # ํ–‰ ์•ˆ์ชฝ ํŒจ๋”ฉ
CARD_H = "430px" # ์นด๋“œ(๋ฐ•์Šค) ๋†’์ด
GRAPH_H = "390px" # ์นด๋“œ ์•ˆ ๊ทธ๋ž˜ํ”„ ๋†’์ด (CARD_H๋ณด๋‹ค 40px ์ž‘๊ฒŒ)
KPI_GAP = "12px" # KPI ์นด๋“œ ๊ฐ„๊ฒฉ
ROW1_COLS = "1fr 1fr 1fr" # ์ƒ๋‹จ 3์นด๋“œ ๋™์ผ ๋„ˆ๋น„
ROW2_COLS = "1fr 1fr 1fr" # ํ•˜๋‹จ 3์นด๋“œ ๋™์ผ ๋„ˆ๋น„
app.layout = html.Div(
[
dcc.Store(id="store-master"),
dcc.Store(id="store-tm"),
dcc.Store(id="store-sankey"),
dcc.Store(id="store-overall"),
dcc.Store(id="store-mod-opts"),
# Sankey ๋“œ๋ž˜๊ทธ ํ† ๊ธ€ + ์ธํ„ฐ๋ž™์…˜ ๋กœ๊ทธ
html.Div(
[
dcc.Checklist(
id="sankey-drag",
options=[{"label": " Sankey ๋“œ๋ž˜๊ทธ ํ—ˆ์šฉ", "value": "drag"}],
value=[],
inputStyle={"marginRight": "6px"},
style={"fontSize": "12px", "color": "#555"},
),
html.Div(id="interact-msg", style={"marginTop": "6px","fontSize": "12px","color": "#444"}),
],
style={"display":"flex","justifyContent":"space-between","alignItems":"center","padding":"0 16px 8px"},
),
# ์ƒ๋‹จ ๋ฐ”
html.Div(
[
html.Div("Bayesian Journey Dashboard", style={"fontWeight":"700","fontSize":"18px"}),
html.Div(
[
dcc.Input(id="excel-path", value=DEFAULT_PATH, placeholder="Excel ๊ฒฝ๋กœ",
style={"width":"520px","marginRight":"8px"}),
html.Button("Load", id="load-btn", n_clicks=0, className="btn", style={"marginRight":"8px"}),
],
style={"display":"flex","alignItems":"center"},
),
],
style={"display":"flex","justifyContent":"space-between","alignItems":"center",
"padding":"12px 16px","borderBottom":"1px solid #eee","position":"sticky",
"top":"0","background":"#fafafa","zIndex":10},
),
html.Div(id="status-msg", style={"padding":"8px 16px","color":"#555","fontSize":"12px"}),
# ํ•„ํ„ฐ
html.Div(
[
html.Div([html.Label("Segment", style={"fontWeight":"600"}),
dcc.Dropdown(id="dd-seg", options=[], value="ALL", clearable=True)],
style={"flex":"1","minWidth":"220px","marginRight":"8px"}),
html.Div([html.Label("Model", style={"fontWeight":"600"}),
dcc.Dropdown(id="dd-mod", options=[], value="ALL", clearable=True)],
style={"flex":"1","minWidth":"220px","marginRight":"8px"}),
html.Div([html.Label("Loyalty", style={"fontWeight":"600"}),
dcc.Dropdown(id="dd-loy", options=[], value="ALL", clearable=True)],
style={"flex":"1","minWidth":"220px"}),
],
style={"display":"flex","gap":"8px","padding":"12px 16px"},
),
# KPI
html.Div(
[
html.Div([html.Div("ํ‘œ๋ณธ ์ˆ˜", style={"color":"#888","fontSize":"12px"}),
html.H3(id="kpi-sample", style={"margin":"4px 0 0"})], style=KPI_CARD_STYLE),
html.Div([html.Div("์ตœ์ข… ๊ตฌ๋งค์œจ (ฮ” ํฌํ•จ)", style={"color":"#888","fontSize":"12px"}),
html.H3(id="ins-final", style={"margin":"4px 0 0"})], style=KPI_CARD_STYLE),
html.Div([html.Div("์ตœ๋Œ€ ๋“œ๋กญ", style={"color":"#888","fontSize":"12px"}),
html.H3(id="ins-drop", style={"margin":"4px 0 0","fontSize":"18px"})], style=KPI_CARD_STYLE),
html.Div([html.Div("๋ถˆํ™•์‹ค์„ฑ (95% HDI ํญ)", style={"color":"#888","fontSize":"12px"}),
html.H3(id="ins-uncert", style={"margin":"4px 0 0"})], style=KPI_CARD_STYLE),
],
style={
"display":"grid",
"gridTemplateColumns":"repeat(4, minmax(0,1fr))",
"gap": KPI_GAP,
"padding":"0 16px 12px"
},
),
# ์ˆจ๊น€ KPI(ํ˜ธํ™˜)
html.Div([html.H3(id="kpi-buy-success"), html.H3(id="kpi-buy-fail")], style={"display":"none"}),
# Row 1: Sankey + ์ „์ด ํผ๋„(๋ˆ„์ ์œจ) + (์›Œํ„ฐํด/PPC ํƒญ)
html.Div(
[
html.Div(
dcc.Graph(
id="fig-sankey",
config=GRAPH_CONFIG | {"responsive": True},
style={"height": GRAPH_H, "width": "100%"}
),
style={**CARD_STYLE, "height": CARD_H, "overflow": "hidden"} # โ† ๊ณ ์ •/ํด๋ฆฝ
),
html.Div(
dcc.Graph(
id="fig-matrix",
config=GRAPH_CONFIG | {"responsive": True},
style={"height": GRAPH_H, "width": "100%"}
),
style={**CARD_STYLE, "height": CARD_H, "overflow": "hidden"}
),
html.Div(
[
dcc.Tabs(
id="tab-right", value="waterfall",
children=[
dcc.Tab(label="์›Œํ„ฐํด", value="waterfall"),
dcc.Tab(label="PPC(๊ตฌ๋งค์œจ)", value="ppc"),
],
style={"marginBottom":"6px"},
),
dcc.Graph(
id="fig-right",
config=GRAPH_CONFIG | {"responsive": True},
style={"height": GRAPH_H, "width": "100%"}
),
],
style={**CARD_STYLE, "height": CARD_H, "overflow": "hidden"},
),
],
style={
"display":"grid",
"gridTemplateColumns": ROW1_COLS,
"gap": ROW_GAP,
"padding": PAGE_PAD,
"marginBottom":"22px",
},
),
# Row 2: ์Šคํ…Œ์ด์ง€ ๋ฆฌํ”„ํŠธ + ํฌ๋ ˆ์ŠคํŠธ + ๋ฒ„๋ธ”
html.Div(
[
html.Div(
[
html.Div(
[
html.Span(
"Stage",
style={"fontSize": "12px", "color": "#666", "marginRight": "8px"},
),
dcc.Dropdown(
id="dd-stage-rank",
options=[{"label": v, "value": v} for v in ["์„ ํ˜ธ", "์ถ”์ฒœ", "๊ตฌ๋งค์˜ํ–ฅ", "๊ตฌ๋งค"]],
value="๊ตฌ๋งค",
clearable=False,
style={"width": "140px", "fontSize": "12px"},
),
],
style={
"display": "flex",
"justifyContent": "flex-end",
"alignItems": "center",
"marginBottom": "6px",
},
),
dcc.Graph(
id="fig-stage-rank",
config={**GRAPH_CONFIG, "responsive": True},
style={"height": GRAPH_H, "width": "100%"},
),
],
style={**CARD_STYLE, "height": CARD_H, "overflow": "hidden"},
),
html.Div(
dcc.Graph(
id="fig-forest",
config={**GRAPH_CONFIG, "responsive": True},
style={"height": GRAPH_H, "width": "100%"},
),
style={**CARD_STYLE, "height": CARD_H, "overflow": "hidden"},
),
html.Div(
dcc.Graph(
id="fig-bubble",
config={**GRAPH_CONFIG, "responsive": True},
style={"height": GRAPH_H, "width": "100%"},
),
style={**CARD_STYLE, "height": CARD_H, "overflow": "hidden"},
),
],
style={
"display": "grid",
"gridTemplateColumns": ROW2_COLS,
"gap": ROW_GAP,
"padding": PAGE_PAD,
"marginTop": "4px",
},
),
# ์ˆจ๊น€ ๊ทธ๋ž˜ํ”„
html.Div(
[
dcc.Graph(id="fig-survival", config=GRAPH_CONFIG, style={"height": GRAPH_H}),
dcc.Graph(id="fig-funnel", config=GRAPH_CONFIG, style={"height": GRAPH_H}),
],
style={"display":"none"},
),
# ์ƒ์„ธ ํ…Œ์ด๋ธ”
html.Div(
[
html.H4("์ƒ์„ธ ๋ฉ”ํŠธ๋ฆญ", style={"margin":"0 0 8px 0"}),
dash_table.DataTable(
id="metrics-table",
columns=[
{"name": "๋‹จ๊ณ„", "id": "๋‹จ๊ณ„"},
{"name": "๋ฒ ์ด์Šค์ˆ˜", "id": "๋ฒ ์ด์Šค์ˆ˜", "type": "numeric",
"format": Format(precision=0, scheme=Scheme.fixed)},
{"name": "์„ฑ๊ณตํ™•๋ฅ ", "id": "์„ฑ๊ณตํ™•๋ฅ ", "type": "numeric", "format": percent1},
{"name": "์‹คํŒจํ™•๋ฅ ", "id": "์‹คํŒจํ™•๋ฅ ", "type": "numeric", "format": percent1},
{"name": "ํ•˜ํ•œ", "id": "ํ•˜ํ•œ", "type": "numeric", "format": percent1},
{"name": "์ƒํ•œ", "id": "์ƒํ•œ", "type": "numeric", "format": percent1},
{"name": "ํŒ์ •", "id": "ํŒ์ •"},
{"name": "ํ‰๊ฐ€๋“ฑ๊ธ‰", "id": "ํ‰๊ฐ€๋“ฑ๊ธ‰"},
{"name": "SNR", "id": "SNR", "type": "numeric", "format": num1},
{"name": "Lift", "id": "Lift", "type": "numeric", "format": num1},
{"name": "rawํ‰๊ท ", "id": "rawํ‰๊ท ", "type": "numeric", "format": percent1},
{"name": "rawํ‘œ์ค€ํŽธ์ฐจ", "id": "rawํ‘œ์ค€ํŽธ์ฐจ", "type": "numeric", "format": percent1},
],
data=[],
page_size=10,
style_table={"overflowX":"auto"},
style_cell={"fontFamily":"Noto Sans KR, Arial, sans-serif","fontSize":"12px","padding":"6px"},
style_header={"fontWeight":"bold"},
style_data_conditional=[
{"if": {"column_id": "๋ฒ ์ด์Šค์ˆ˜"}, "textAlign": "right"},
{"if": {"column_id": "์„ฑ๊ณตํ™•๋ฅ "}, "textAlign": "right"},
{"if": {"column_id": "์‹คํŒจํ™•๋ฅ "}, "textAlign": "right"},
{"if": {"column_id": "ํ•˜ํ•œ"}, "textAlign": "right"},
{"if": {"column_id": "์ƒํ•œ"}, "textAlign": "right"},
{"if": {"column_id": "SNR"}, "textAlign": "right"},
{"if": {"column_id": "Lift"}, "textAlign": "right"},
{"if": {"column_id": "rawํ‰๊ท "}, "textAlign": "right"},
{"if": {"column_id": "rawํ‘œ์ค€ํŽธ์ฐจ"}, "textAlign": "right"},
{"if": {"row_index": "odd"}, "backgroundColor": "#fafafa"},
],
),
],
style={**CARD_STYLE, "margin":"18px 16px 24px"},
),
],
style={"background":"#f6f7fb","minHeight":"100vh"},
)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ app.layout ๊ต์ฒด ๋ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# ===================== ์ฝœ๋ฐฑ: Load =====================
@app.callback(
Output("store-master","data"),
Output("store-tm","data"),
Output("store-sankey","data"),
Output("store-overall","data"),
Output("dd-seg","options"),
Output("dd-seg","value"),
Output("store-mod-opts","data"),
Output("dd-loy","options"),
Output("dd-loy","value"),
Output("status-msg","children"),
Input("load-btn","n_clicks"),
State("excel-path","value"),
prevent_initial_call=True
)
def on_load(n, path):
try:
exists = os.path.exists(path)
size = (os.path.getsize(path) if exists else 0)
# 1) ์—‘์…€ ๋กœ๋“œ
df_master, df_tm, df_sankey, overall, seg_opts, mod_opts_all, loy_opts, dbg = load_excel(path)
# 2) ๋งˆ์Šคํ„ฐ๋กœ๋ถ€ํ„ฐ ๋ชจ๋“  ์กฐํ•ฉ Sankey ์บ์‹œ ํ•ฉ์„ฑ
df_sankey_syn = build_sankey_cache_from_master(df_master, collapse_to_buy=True)
# 3) ์ƒํƒœ ๋ฉ”์‹œ์ง€(์บ์‹œ ํ–‰์ˆ˜ ํฌํ•จ)
status = (f"โœ… ๋กœ๋“œ ์™„๋ฃŒ | path={path} (exists={exists}, size={size:,} bytes) | "
f"engine={dbg.get('engine')} | sheets={dbg.get('sheets')} | matched={dbg.get('matched')} | "
f"sankey_cache={len(df_sankey_syn):,} rows")
# 4) ๋ฆฌํ„ด: ์„ธ ๋ฒˆ์งธ(store-sankey)์— ์บ์‹œ๋ฅผ ๋„ฃ๋Š”๋‹ค
return (
df_master.to_json(date_format="iso", orient="split"),
df_tm.to_json(date_format="iso", orient="split"),
df_sankey_syn.to_json(date_format="iso", orient="split"), # โฌ… ์—ฌ๊ธฐ!
json.dumps(overall),
[{"label":v, "value":v} for v in seg_opts], "ALL",
json.dumps(mod_opts_all),
[{"label":v, "value":v} for v in loy_opts], "ALL",
status
)
except Exception as e:
err = f"โŒ LOAD ERROR: {type(e).__name__}: {e}"
print("LOAD ERROR TRACE:\n", traceback.format_exc())
return None, None, None, None, [], None, None, [], None, err
# ์„ธ๊ทธ๋จผํŠธ ๋ณ€๊ฒฝ ์‹œ ๋ชจ๋ธ ์˜ต์…˜ ์—…๋ฐ์ดํŠธ
@app.callback(
Output("dd-mod","options"),
Output("dd-mod","value"),
Input("dd-seg","value"),
State("store-master","data"),
State("store-mod-opts","data"),
)
def on_seg_change(seg, js_master, js_allmods):
if not js_master or not js_allmods:
return [], None
df_master = pd.read_json(js_master, orient="split")
seg_val = _as_all(seg)
if seg_val!="ALL":
mods = ["ALL"] + sorted([str(v) for v in df_master[df_master["segment"].astype(str)==seg_val]["model"].dropna().astype(str).unique().tolist() if str(v)!="ALL"])
else:
mods = json.loads(js_allmods)
return [{"label":v,"value":v} for v in mods], "ALL"
@app.callback(
Output("interact-msg","children"),
Input("fig-sankey","clickData"),
Input("fig-matrix","relayoutData"),
Input("fig-right","relayoutData"),
Input("fig-stage-rank","selectedData"),
Input("fig-forest","selectedData"),
Input("fig-bubble","selectedData"),
prevent_initial_call=True
)
def on_interact(sankey_click, matrix_relayout, wf_relayout, rank_sel, forest_sel, bubble_sel):
ctx = dash.callback_context
if not ctx.triggered:
return dash.no_update
tid = ctx.triggered[0]["prop_id"] # e.g. "fig-bubble.selectedData"
comp, prop = tid.split(".")
payload = ctx.triggered[0]["value"]
if prop == "clickData" and payload:
pt = (payload.get("points") or [{}])[0]
label = pt.get("label") or f"{pt.get('sourceLabel','?')}โ†’{pt.get('targetLabel','?')}"
return f"๐Ÿ–ฑ {comp}: {label} ํด๋ฆญ"
if prop == "selectedData" and payload:
n = len(payload.get("points", []))
return f"๐Ÿ”Ž {comp}: {n}๊ฐœ ์„ ํƒ"
if prop == "relayoutData" and payload:
keys = ", ".join(list(payload.keys())[:3])
return f"๐Ÿงญ {comp}: ๋ทฐ ๋ณ€๊ฒฝ({keys}...)"
return dash.no_update
# update_all ์œ„์ชฝ(๊ฐ™์€ ํŒŒ์ผ)์— ์ถ”๊ฐ€
def _slice_sankey_cache_by_choice(df, seg, mod, loy):
if df is None or df.empty:
return pd.DataFrame()
sub = df.copy()
if "segment" in sub.columns and seg != "ALL":
sub = sub[(sub["segment"].astype(str) == seg) | sub["segment"].isna() | (sub["segment"].astype(str) == "ALL")]
if "model" in sub.columns and mod != "ALL":
sub = sub[(sub["model"].astype(str) == mod) | sub["model"].isna() | (sub["model"].astype(str) == "ALL")]
if "loyalty" in sub.columns and loy != "ALL":
sub = sub[(sub["loyalty"].astype(str) == loy) | sub["loyalty"].isna() | (sub["loyalty"].astype(str) == "ALL")]
if "level" in sub.columns:
for lv in LVL_PRIORITY:
cand = sub[sub["level"].astype(str) == lv]
if not cand.empty:
return cand.copy()
return sub
# ๋ ˆ๋ฒจ ์šฐ์„ ์ˆœ์œ„(๊ฐ€์žฅ ์„ธ๋ถ„ํ™”๋œ ๊ฒƒ๋ถ€ํ„ฐ)๋กœ ํ•˜๋‚˜๋งŒ ๋‚จ๊ธฐ๊ธฐ
if "level" in sub.columns:
for lv in LVL_PRIORITY:
cand = sub[sub["level"].astype(str) == lv]
if not cand.empty:
return cand.copy()
return sub
def _read_df_store(js):
if not js:
return pd.DataFrame()
# ์ด๋ฏธ dict/object๋กœ ๋“ค์–ด์˜ค๋ฉด ์‹œ๋„
if isinstance(js, dict):
if {"columns","data"}.issubset(js.keys()):
return pd.DataFrame(js["data"], columns=js["columns"])
try:
return pd.DataFrame(js)
except Exception:
return pd.DataFrame()
# ๋ฌธ์ž์—ด์ด๋ฉด ์šฐ์„  split โ†’ ์‹คํŒจ ์‹œ ์ผ๋ฐ˜ json ํ•ด์„
if isinstance(js, str):
try:
return pd.read_json(io.StringIO(js), orient="split")
except Exception:
try:
obj = json.loads(js)
if isinstance(obj, dict) and {"columns","data"}.issubset(obj.keys()):
return pd.DataFrame(obj["data"], columns=obj["columns"])
elif isinstance(obj, list):
return pd.DataFrame(obj)
elif isinstance(obj, dict):
# overall ๊ฐ™์€ dict๊ฐ€ ์˜ค๋ฉด DF๋กœ ๋งŒ๋“ค์ง€ ์•Š๊ณ  ๋นˆ DF ๋ฐ˜ํ™˜
return pd.DataFrame()
except Exception:
return pd.DataFrame()
return pd.DataFrame()
def _read_overall(js_overall):
if not js_overall:
return {}
if isinstance(js_overall, dict):
return js_overall
try:
return json.loads(js_overall)
except Exception:
return {}
# ===================== ์ฝœ๋ฐฑ: ๋Œ€์‹œ๋ณด๋“œ ๊ณ„์‚ฐ =====================
@app.callback(
Output("kpi-sample","children"),
Output("kpi-buy-success","children"),
Output("kpi-buy-fail","children"),
Output("ins-final","children"),
Output("ins-drop","children"),
Output("ins-uncert","children"),
Output("metrics-table","data"),
Output("fig-sankey","figure"),
Output("fig-matrix","figure"),
#Output("fig-simfan","figure"),
Output("fig-bubble","figure"),
Output("fig-stage-rank","figure"),
Output("fig-survival","figure"),
Output("fig-right","figure"),
# Output("fig-waterfall","figure"),
Output("fig-funnel","figure"),
Output("fig-forest","figure"),
Input("dd-seg","value"),
Input("dd-mod","value"),
Input("dd-loy","value"),
Input("sankey-drag","value"),
Input("dd-stage-rank","value"),
Input("tab-right","value"), # โ† ์ถ”๊ฐ€
Input("store-master","data"),
Input("store-tm","data"),
Input("store-sankey","data"),
Input("store-overall","data"),
)
def update_all(seg, mod, loy, drag_val, stage_label, tab_right,
js_master, js_tm, js_sankey, js_overall=None):
# ๊ธฐ๋ณธ๊ฐ’ ๋ณด์ •
seg = _as_all(seg); mod = _as_all(mod); loy = _as_all(loy)
if not isinstance(stage_label, str) or not stage_label:
stage_label = "๊ตฌ๋งค"
empty = _empty_fig("Load data first")
# ๊ฐ€๋“œ: ๋งˆ์Šคํ„ฐ ์—†์œผ๋ฉด 15๊ฐœ ํ…œํ”Œ๋ฆฟ ๋ฆฌํ„ด
if not js_master:
return (
"โ€“", "โ€“", "โ€“", # kpi-sample, kpi-buy-success, kpi-buy-fail
"โ€“", "โ€“", "โ€“", # ins-final, ins-drop, ins-uncert
[], # metrics-table.data
empty, empty, # fig-sankey, fig-matrix
empty, empty, # fig-bubble, fig-stage-rank
empty, empty, # fig-survival, fig-right
empty, # fig-funnel
empty # fig-forest
)
js_sankey, js_overall, _ = _maybe_swap_sankey_overall(js_sankey, js_overall)
try:
# 0) sankey/overall ๋’ค๋ฐ”๋€œ ์ž๋™ ๊ต์ •
js_sankey, js_overall, _ = _maybe_swap_sankey_overall(js_sankey, js_overall)
# 1) ์Šคํ† ์–ด ์ฝ๊ธฐ(์•ˆ์ „)
df_master = _read_df_store(js_master)
df_tm = _read_df_store(js_tm)
df_sankey = _read_df_store(js_sankey)
overall = _read_overall(js_overall)
# 2) ์„ ํƒ/์Šค์ฝ”ํ”„
row_pick = pick_row_for(df_master, seg, mod, loy)
scope = df_master.copy()
if seg!="ALL": scope = scope[scope["segment"].astype(str)==seg]
if mod!="ALL": scope = scope[scope["model"].astype(str)==mod]
if loy!="ALL": scope = scope[scope["loyalty"].astype(str)==loy]
# ์ง‘๊ณ„ํ–‰์œผ๋กœ ๊ฒฐ์ธก ๋ณด๊ฐ•
row_agg = compose_composite_row(scope)
rowd = {k: row_pick[k] for k in row_pick.index}
def _safe_num_or_nan(x):
try:
fx = float(x)
return fx if np.isfinite(fx) else np.nan
except Exception:
return np.nan
def coalesce_into(dst_dict, src_series, cols):
for c in cols:
va = _safe_num_or_nan(dst_dict.get(c))
if np.isnan(va):
dst_dict[c] = (src_series.get(c) if isinstance(src_series, pd.Series) else np.nan)
core_cols = [
"pref_sample_size",
"pref_success_rate","pref_ci_lower","pref_ci_upper",
"rec_success_rate","rec_ci_lower","rec_ci_upper",
"intent_success_rate","intent_ci_lower","intent_ci_upper",
"buy_success_rate","buy_ci_lower","buy_ci_upper",
"bayesian_dropout_pref_to_rec","bayesian_dropout_rec_to_intent","bayesian_dropout_intent_to_buy",
"bayesian_full_conversion",
"pref_snr","rec_snr","intent_snr","buy_snr",
"pref_lift_vs_galaxy","rec_lift_vs_galaxy","intent_lift_vs_galaxy","buy_lift_vs_galaxy",
]
coalesce_into(rowd, row_agg, core_cols)
row = pd.Series(rowd)
# 3) KPI/ํ…Œ์ด๋ธ”
tbl = metrics_table_row(row)
def _face(val, good, soso, reverse=False):
if not np.isfinite(val): return "โ”"
v = (1 - val) if reverse else val
return "๐ŸŸข" if v >= good else ("๐ŸŸก" if v >= soso else "๐Ÿ”ด")
GOOD_P, SOSO_P = 0.55, 0.45
GOOD_DROP, SOSO_DROP = 0.20, 0.35
GOOD_W, SOSO_W = 0.08, 0.12
sample = _safe_int0(row.get("pref_sample_size"))
kpi_sample_text = f"๐Ÿ“Š {sample:,}"
buy_p = _safe_num(row.get("buy_success_rate"))
buy_s = (f"{buy_p:.1%}" if np.isfinite(buy_p) else "N/A")
buy_f = (f"{(1-buy_p):.1%}" if np.isfinite(buy_p) else "N/A")
overall_buy = _safe_num(overall.get("buy_mean"))
delta = (buy_p - overall_buy) if (np.isfinite(buy_p) and np.isfinite(overall_buy)) else np.nan
face_final = _face(buy_p, GOOD_P, SOSO_P, reverse=False)
ins_final = (f"{face_final} ์„ฑ๊ณต {buy_s} / ์‹คํŒจ {buy_f} (vs ์ „์ฒด {delta:+.1%}p)"
if np.isfinite(delta) else f"{face_final} ์„ฑ๊ณต {buy_s} / ์‹คํŒจ {buy_f}")
d1, d2, d3, _ = drops_from_anywhere(row, df_tm, seg, mod, loy)
drops = [v for v in [d1, d2, d3] if np.isfinite(v)]
dmax = max(drops) if drops else np.nan
face_drop = _face(dmax, GOOD_DROP, SOSO_DROP, reverse=True)
ins_drop = f"{face_drop} " + biggest_drop_text_by_sources(row, df_tm, seg, mod, loy)
def _widest_hdi(r):
pick = []
for stage, lo_col, hi_col in [("์„ ํ˜ธ","pref_ci_lower","pref_ci_upper"),
("์ถ”์ฒœ","rec_ci_lower","rec_ci_upper"),
("๊ตฌ๋งค์˜ํ–ฅ","intent_ci_lower","intent_ci_upper"),
("๊ตฌ๋งค","buy_ci_lower","buy_ci_upper")]:
lo = _safe_num(r.get(lo_col)); hi = _safe_num(r.get(hi_col))
if np.isfinite(lo) and np.isfinite(hi):
pick.append((stage, max(0.0, hi - lo)))
return max(pick, key=lambda x: x[1]) if pick else (None, np.nan)
stage_w, width_w = _widest_hdi(row)
face_unc = _face(width_w, GOOD_W, SOSO_W, reverse=True)
ins_uncert = "๋ฐ์ดํ„ฐ ์—†์Œ" if stage_w is None else f"{face_unc} {stage_w} ๋‹จ๊ณ„ {width_w*100:.1f}%p"
# 4) Sankey (์บ์‹œ ์ •๊ทœํ™” โ†’ ๋ณด๊ฐ•)
g_for_sankey = build_sankey_flow_table(df_sankey, seg=seg, mod=mod, loy=loy, collapse_to_buy=True)
if g_for_sankey is None or g_for_sankey.empty:
# ์™„์ „ ๋น„๋ฉด ํ˜„์žฌ row๋กœ ์ฆ‰์„ ํ•ฉ์„ฑ
g_for_sankey = _sankey_from_master_row(row, seg, mod, loy)
g_for_sankey = add_collapsed_to_buy(g_for_sankey, add_from=("์„ ํ˜ธ","์ถ”์ฒœ","๊ตฌ๋งค์˜ํ–ฅ"))
fig_sankey = sankey_figure(
df_sankey=None,
seg=seg, mod=mod, loy=loy,
drag=("drag" in (drag_val or [])),
table_override=g_for_sankey
)
# 5) ๋‚˜๋จธ์ง€ ๊ทธ๋ž˜ํ”„
fig_matrix = matrix_funnel_figure(row, df_tm, seg, mod, loy)
lift_col = "buy_lift_vs_galaxy" if "buy_lift_vs_galaxy" in scope.columns else "pref_lift_vs_galaxy"
snr_col = "buy_snr" if "buy_snr" in scope.columns else "pref_snr"
fig_bubble = bubble_figure(scope, lift_col, snr_col)
fig_stage_rank = compare_distribution_figure(df_master, seg, mod, loy, stage_label)
fig_survival = survival_curve_figure(row, df_tm, seg, mod, loy)
fig_funnel = stacked_funnel_figure(row)
fig_forest = forest_figure(scope)
fig_right = (ppc_purchase_overlay_figure(row)
if (tab_right or "waterfall") == "ppc"
else waterfall_figure(row, df_tm, seg, mod, loy))
# 6) ์ตœ์ข… 15๊ฐœ ๋ฆฌํ„ด(์ฝœ๋ฐฑ Output ์ˆœ์„œ๋Œ€๋กœ)
return (
kpi_sample_text, buy_s, buy_f, # kpi-sample, kpi-buy-success, kpi-buy-fail
ins_final, ins_drop, ins_uncert, # ์ธ์‚ฌ์ดํŠธ 3๊ฐœ
tbl.to_dict("records"), # metrics-table.data
fig_sankey, fig_matrix, # sankey, matrix
fig_bubble, fig_stage_rank, # bubble, stage-rank
fig_survival, fig_right, # survival, right-panel(waterfall/ppc)
fig_funnel, # funnel
fig_forest # forest
)
except Exception:
print("UPDATE ERROR:\n", traceback.format_exc())
return (
"โ€“","โ€“","โ€“","โ€“","โ€“","โ€“",
[],
empty, empty, empty, empty, empty, empty, empty, empty
)
# ===================== ์‹คํ–‰ =====================
if __name__ == "__main__":
base_port = int(os.getenv("PORT", "8059"))
for i in range(5):
try:
app.run_server(host="0.0.0.0", port=base_port + i, debug=False, use_reloader=False)
break
except (OSError, SystemExit) as e:
if "Address already in use" in str(e) or getattr(e, "code", None) == 1:
continue
raise