Realrich / src /streamlit_app.py
evanskim113's picture
Update src/streamlit_app.py
f4df2c5 verified
# =============================================================
# ⚽ XGBoost 3-Class 예측 + 유사 경기 분포 (Full Integration, Mobile-Fix)
# =============================================================
import streamlit as st
import pandas as pd
import numpy as np
import joblib
# ===============================
# 앱 기본 설정 + 모바일 표시 버그 대응 CSS/JS
# ===============================
st.set_page_config(page_title="⚽ XGBoost 예측 + 유사 경기 탐색기", layout="wide")
# 🔧 모바일에서 텍스트/메트릭이 잘리지 않도록 overflow/zoom/글꼴 고정 + 초기 리사이즈 트리거
st.markdown(
"""
<style>
.stApp, .block-container, [data-testid="stVerticalBlock"], [data-testid="column"] {
overflow: visible !important;
}
@media (max-width: 768px) {
.block-container {
padding-left: 1rem !important;
padding-right: 1rem !important;
}
.prob-pill {
font-size: 1.1rem;
color: #ffffff;
background: #262730;
padding: 6px 10px;
border-radius: 8px;
display: inline-block;
margin: 4px 6px 4px 0;
line-height: 1.2;
}
.prob-section h3 {
margin-top: 0.4rem;
margin-bottom: 0.4rem;
}
}
</style>
<script>
window.addEventListener('load', () => {
setTimeout(() => { window.dispatchEvent(new Event('resize')); }, 150);
});
</script>
""",
unsafe_allow_html=True,
)
st.title("⚽ XGBoost 예측 + 유사 경기 분포 + 유사 경기 탐색기")
EQ_DECIMALS = 2
def eq(a, b, decimals=EQ_DECIMALS):
return np.round(a, decimals) == np.round(b, decimals)
# ===============================
# Feature 목록 (65피처 통합)
# ===============================
expected_cols_65 = [
'norm_win','norm_draw','norm_lose','mean_odds','std_odds','cv_odds',
'p_win','p_draw','p_lose','overround','entropy','spread','spread_draw',
'odds_ratio_wd','odds_ratio_wl','odds_ratio_dl','draw_prob_ratio','draw_ratio',
'draw_prob_gap','fav_gap','fav_draw_gap','fav_diff','draw_gap_mean',
'rank_win','rank_draw','rank_lose','p_win_norm','p_draw_norm','p_lose_norm',
'ev_win','ev_draw','ev_lose','draw_vs_avg','draw_vs_max','cv_spread','cv_draw_gap',
'draw_margin','fav_ratio','draw_skew','log_spread','draw_entropy_component','dominance_score',
'hmean_odds','hstd_odds','hcv_odds','hentropy','hspread','hspread_draw',
'hp_win','hp_draw','hp_lose','hp_win_norm','hp_draw_norm','hp_lose_norm','hoverround',
'diff_win_prob','diff_draw_prob','diff_lose_prob','diff_draw_odds',
'base_win_odds','base_draw_odds','base_lose_odds',
'base_overround_ex','base_entropy_ex','base_spread_ex'
]
# ===============================
# Feature 생성
# ===============================
def build_feature_dict(win, draw, lose, hwin, hdraw, hlose):
d = {}
denom = (win+draw+lose)
d['norm_win'], d['norm_draw'], d['norm_lose'] = win/denom, draw/denom, lose/denom
d['mean_odds'] = np.mean([win,draw,lose])
d['std_odds'] = np.std([win,draw,lose])
d['cv_odds'] = d['std_odds']/d['mean_odds'] if d['mean_odds']>0 else 0
d['p_win'], d['p_draw'], d['p_lose'] = 1/win, 1/draw, 1/lose
p_tot = d['p_win'] + d['p_draw'] + d['p_lose']
d['p_win_norm'], d['p_draw_norm'], d['p_lose_norm'] = d['p_win']/p_tot, d['p_draw']/p_tot, d['p_lose']/p_tot
d['overround'] = p_tot
d['entropy'] = -sum(x*np.log(x) for x in [d['p_win_norm'],d['p_draw_norm'],d['p_lose_norm']])
d['spread'] = max(win,draw,lose)-min(win,draw,lose)
d['spread_draw'] = abs(draw-(win+lose)/2)
d['odds_ratio_wd'],d['odds_ratio_wl'],d['odds_ratio_dl']=win/draw,win/lose,draw/lose
d['draw_prob_ratio']=d['p_draw']/max(d['p_win'],d['p_lose'])
d['draw_ratio']=draw/min(win,lose)
d['draw_prob_gap']=abs(d['p_draw']-(d['p_win']+d['p_lose'])/2)
d['fav_gap']=abs(win-lose)
d['fav_draw_gap']=abs(draw-min(win,lose))
d['fav_diff']=abs(win-lose)
d['draw_gap_mean']=abs(draw-d['mean_odds'])
d['rank_win'],d['rank_draw'],d['rank_lose']=pd.Series([win,draw,lose]).rank().tolist()
d['ev_win'],d['ev_draw'],d['ev_lose']=win*d['p_win_norm'],draw*d['p_draw_norm'],lose*d['p_lose_norm']
d['draw_vs_avg']=draw/d['mean_odds']
d['draw_vs_max']=draw/max(win,draw,lose)
d['cv_spread']=d['spread']/d['mean_odds']
d['cv_draw_gap']=d['fav_draw_gap']/d['mean_odds']
d['draw_margin']=abs(draw-(win+lose)/2)
d['fav_ratio']=min(win,lose)/max(win,lose)
d['draw_skew']=(draw-win)-(lose-draw)
d['log_spread']=np.log(max(win,draw,lose))-np.log(min(win,draw,lose))
d['draw_entropy_component']=-d['p_draw_norm']*np.log(d['p_draw_norm'])
d['dominance_score']=max(d['p_win_norm'],d['p_lose_norm'])-d['p_draw_norm']
d['hmean_odds']=np.mean([hwin,hdraw,hlose])
d['hstd_odds']=np.std([hwin,hdraw,hlose])
d['hcv_odds']=d['hstd_odds']/d['hmean_odds'] if d['hmean_odds']>0 else 0
p_h=1/np.array([hwin,hdraw,hlose]); p_hn=p_h/p_h.sum()
d['hp_win'],d['hp_draw'],d['hp_lose']=p_h
d['hp_win_norm'],d['hp_draw_norm'],d['hp_lose_norm']=p_hn
d['hoverround']=p_h.sum()
d['hentropy']=-np.sum(p_hn*np.log(p_hn))
d['hspread']=max(hwin,hdraw,hlose)-min(hwin,hdraw,hlose)
d['hspread_draw']=abs(hdraw-(hwin+hlose)/2)
d['diff_win_prob']=d['p_win_norm']-d['hp_win_norm']
d['diff_draw_prob']=d['p_draw_norm']-d['hp_draw_norm']
d['diff_lose_prob']=d['p_lose_norm']-d['hp_lose_norm']
d['diff_draw_odds']=hdraw-draw
d['base_win_odds'],d['base_draw_odds'],d['base_lose_odds']=win,draw,lose
d['base_overround_ex'],d['base_entropy_ex'],d['base_spread_ex']=p_tot,d['entropy'],d['spread']
return d
def build_feature_frame(win, draw, lose, hwin, hdraw, hlose):
d = build_feature_dict(win, draw, lose, hwin, hdraw, hlose)
df = pd.DataFrame([d])
return df[expected_cols_65]
# ===============================
# 모델 로드 (XGBoost)
# ===============================
@st.cache_resource
def load_models():
base = joblib.load("xgb_model_wdl_softmax.pkl")
hand = joblib.load("xgb_model_handicap_65f.pkl")
enc = joblib.load("label_encoder_handicap.pkl")
return base, hand, enc
model_base, model_hand, encoder_hand = load_models()
# ===============================
# 예측
# ===============================
def predict_all(win, draw, lose, hwin, hdraw, hlose):
df_feat = build_feature_frame(win, draw, lose, hwin, hdraw, hlose)
probs_base = model_base.predict_proba(df_feat.values)[0]
probs_hand = model_hand.predict_proba(df_feat.values)[0]
return dict(zip(["승","무","패"], probs_base)), dict(zip(["핸디 승","핸디 무","핸디 패"], probs_hand))
# ===============================
# DB 로드
# ===============================
@st.cache_data
def load_db():
df = pd.read_excel("proto_core_65_fastsearch.xlsx", engine="openpyxl")
for c in ["승","무","패","핸디 승","핸디 무","핸디 패"]:
df[c] = pd.to_numeric(df[c], errors="coerce")
return df
df = load_db()
# ===============================
# 사이드바: 입력 배당
# ===============================
st.sidebar.header("⚙️ 입력 배당")
odds_str = st.sidebar.text_input("배당 (승/무/패/핸승/핸무/핸패)", "2.05/3.35/3.45/3.65/3.75/1.90")
try:
win, draw, lose, hwin, hdraw, hlose = map(float, odds_str.split("/"))
except:
st.error("형식 오류! 예: 2.05/3.35/3.45/3.65/3.75/1.90")
st.stop()
# 먼저 유사 경기 탐색기 섹션을 렌더 (모바일 렌더 순서 안정화)
st.header("🔍 유사 경기 탐색기 (정배당 일치 포함)")
base_odds, hand_odds = [win, draw, lose], [hwin, hdraw, hlose]
base_min_idx_in = np.argmin(base_odds)
hand_min_idx_in = np.argmin(hand_odds)
base_min_val, hand_min_val = min(base_odds), min(hand_odds)
base_rev_in = base_odds[2] if base_min_idx_in == 0 else base_odds[0]
hand_rev_in = hand_odds[2] if hand_min_idx_in == 0 else hand_odds[0]
base_rank_in = pd.Series(base_odds).rank().tolist()
hand_rank_in = pd.Series(hand_odds).rank().tolist()
dfc = df.copy()
for c in ["승","무","패","핸디 승","핸디 무","핸디 패"]:
dfc[c] = pd.to_numeric(dfc[c], errors="coerce")
dfc["base_min_idx"] = dfc[["승","무","패"]].values.argmin(axis=1)
dfc["hand_min_idx"] = dfc[["핸디 승","핸디 무","핸디 패"]].values.argmin(axis=1)
dfc["base_rev_val"] = np.where(dfc["base_min_idx"]==0, dfc["패"], dfc["승"])
dfc["hand_rev_val"] = np.where(dfc["hand_min_idx"]==0, dfc["핸디 패"], dfc["핸디 승"])
dfc["draw_rank"] = pd.DataFrame(dfc[["승","무","패"]]).rank(axis=1).iloc[:,1]
dfc["hand_draw_rank"] = pd.DataFrame(dfc[["핸디 승","핸디 무","핸디 패"]]).rank(axis=1).iloc[:,1]
st.markdown("### 🔗 복합 조건 선택")
use_combo1 = st.checkbox("기본 정배당 + 무 + 기본 역배당")
use_combo2 = st.checkbox("핸디 정배당 + 핸무 + 핸디 역배당")
use_combo3 = st.checkbox("기본 역배당 + 무")
use_combo4 = st.checkbox("핸디 역배당 + 핸무")
use_combo5 = st.checkbox("무 + 핸무")
use_combo6 = st.checkbox("무 + 핸무 + 핸디 정배 소수 첫째자리 일치")
use_combo7 = st.checkbox("무 + 핸무 + 기본 정배 소수 첫째자리 일치")
mask = (dfc["base_min_idx"]==base_min_idx_in)&(dfc["hand_min_idx"]==hand_min_idx_in)
draw_rank_in = base_rank_in[1]
hand_draw_rank_in = hand_rank_in[1]
mask &= (dfc["draw_rank"] == draw_rank_in)
mask &= (dfc["hand_draw_rank"] == hand_draw_rank_in)
if use_combo1:
mask &= (eq(dfc["무"], draw)&eq(dfc["base_rev_val"], base_rev_in)&
(np.round(dfc[["승","무","패"]].min(axis=1),2)==round(base_min_val,2)))
if use_combo2:
mask &= (eq(dfc["핸디 무"], hdraw)&eq(dfc["hand_rev_val"], hand_rev_in)&
(np.round(dfc[["핸디 승","핸디 무","핸디 패"]].min(axis=1),2)==round(hand_min_val,2)))
if use_combo3:
mask &= eq(dfc["무"], draw)&eq(dfc["base_rev_val"], base_rev_in)
if use_combo4:
mask &= eq(dfc["핸디 무"], hdraw)&eq(dfc["hand_rev_val"], hand_rev_in)
if use_combo5:
mask &= eq(dfc["무"], draw)&eq(dfc["핸디 무"], hdraw)
if use_combo6:
mask &= (eq(dfc["무"], draw)&eq(dfc["핸디 무"], hdraw)&
(np.floor(dfc[["핸디 승","핸디 무","핸디 패"]].min(axis=1)*10)/10.0==np.floor(hand_min_val*10)/10.0))
if use_combo7:
mask &= (
eq(dfc["무"], draw) & eq(dfc["핸디 무"], hdraw) &
(np.floor(dfc[["승","무","패"]].min(axis=1)*10)/10.0 == np.floor(base_min_val*10)/10.0)
)
df_sim = dfc.loc[mask].copy().reset_index(drop=True)
st.subheader("✅ 유사 경기 결과")
if df_sim.empty:
st.warning("❌ 조건을 만족하는 유사 경기가 없습니다.")
else:
cols_pref = ["일자","리그","홈팀","원정팀","승","무","패","핸디 승","핸디 무","핸디 패","결과","핸디결과"]
cols_show = [c for c in cols_pref if c in df_sim.columns]
if "일자" in df_sim.columns:
df_sim["일자"] = pd.to_datetime(df_sim["일자"], errors="coerce").dt.strftime("%Y-%m-%d")
st.dataframe(df_sim[cols_show], use_container_width=True)
if "결과" in df_sim.columns or "핸디결과" in df_sim.columns:
st.markdown("### 📊 결과 분포")
if "결과" in df_sim.columns:
st.write("**기본 시장 결과 분포:**", df_sim["결과"].value_counts().to_dict())
if "핸디결과" in df_sim.columns:
st.write("**핸디캡 시장 결과 분포:**", df_sim["핸디결과"].value_counts().to_dict())
st.divider()
# ===============================
# 🔢 확률 표시 (모바일 완전 호환)
# ===============================
base_probs, hand_probs = predict_all(win, draw, lose, hwin, hdraw, hlose)
mobile_text_mode = st.toggle("📱 모바일 강제 호환 모드(텍스트만)", value=True)
st.markdown('<div class="prob-section">', unsafe_allow_html=True)
st.markdown("### ⚽ 기본 승/무/패 확률")
if mobile_text_mode:
for k, emoji in zip(["승","무","패"], ["🟢","🟡","🔴"]):
st.markdown(f"{emoji} **{k}** : {base_probs[k]*100:.2f}%")
else:
html = "".join([f'<span class="prob-pill"><b>{k}</b>: {base_probs[k]*100:.2f}%</span>'
for k in ["승","무","패"]])
st.markdown(html, unsafe_allow_html=True)
st.markdown("### 🎯 핸디캡 승/무/패 확률")
if mobile_text_mode:
for k, emoji in zip(["핸디 승","핸디 무","핸디 패"], ["🟢","🟡","🔴"]):
st.markdown(f"{emoji} **{k}** : {hand_probs[k]*100:.2f}%")
else:
html = "".join([f'<span class="prob-pill"><b>{k}</b>: {hand_probs[k]*100:.2f}%</span>'
for k in ["핸디 승","핸디 무","핸디 패"]])
st.markdown(html, unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
st.caption("ⓒ XGBoost 3-Class Softmax Models | 기본·핸디 65피처 통합형")