evanskim113 commited on
Commit
a6cb36e
·
verified ·
1 Parent(s): 3e2a517

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +273 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,275 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ # app_catboost_full.py
 
 
2
  import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ import joblib
6
+
7
+ # ===============================
8
+ # 앱 기본 설정
9
+ # ===============================
10
+ st.set_page_config(page_title="⚽ CatBoost 예측 + 유사 경기 분포", layout="wide")
11
+ st.title("⚽ CatBoost 3-Class 예측 + 유사 경기 분포")
12
+
13
+ # ===============================
14
+ # 동등성 비교 정밀도 (소수 2째 자리)
15
+ # ===============================
16
+ EQ_DECIMALS = 2 # 필요시 3으로 조정
17
+
18
+ def eq(a, b, decimals=EQ_DECIMALS):
19
+ return np.round(a, decimals) == np.round(b, decimals)
20
+
21
+ # ===============================
22
+ # Feature 목록
23
+ # - 기본 모델 입력: 59피처
24
+ # - 핸디 모델 입력: 65피처 (= 59 + 기본시장 보조 6)
25
+ # ===============================
26
+ expected_cols_base59 = [
27
+ 'norm_win','norm_draw','norm_lose','mean_odds','std_odds','cv_odds',
28
+ 'p_win','p_draw','p_lose','overround','entropy','spread','spread_draw',
29
+ 'odds_ratio_wd','odds_ratio_wl','odds_ratio_dl','draw_prob_ratio','draw_ratio',
30
+ 'draw_prob_gap','fav_gap','fav_draw_gap','fav_diff','draw_gap_mean',
31
+ 'rank_win','rank_draw','rank_lose','p_win_norm','p_draw_norm','p_lose_norm',
32
+ 'ev_win','ev_draw','ev_lose','draw_vs_avg','draw_vs_max','cv_spread','cv_draw_gap',
33
+ 'draw_margin','fav_ratio','draw_skew','log_spread','draw_entropy_component','dominance_score',
34
+ 'hmean_odds','hstd_odds','hcv_odds','hentropy','hspread','hspread_draw',
35
+ 'hp_win','hp_draw','hp_lose','hp_win_norm','hp_draw_norm','hp_lose_norm','hoverround',
36
+ 'diff_win_prob','diff_draw_prob','diff_lose_prob','diff_draw_odds'
37
+ ]
38
+
39
+ expected_cols_handicap65 = expected_cols_base59 + [
40
+ 'base_win_odds','base_draw_odds','base_lose_odds',
41
+ 'base_overround_ex','base_entropy_ex','base_spread_ex'
42
+ ]
43
+
44
+ # ===============================
45
+ # Feature 생성
46
+ # ===============================
47
+ def build_feature_dict(win, draw, lose, hwin, hdraw, hlose):
48
+ d = {}
49
+ # --- 기본 시장 ---
50
+ denom = (win+draw+lose)
51
+ d['norm_win'] = win/denom
52
+ d['norm_draw'] = draw/denom
53
+ d['norm_lose'] = lose/denom
54
+ d['mean_odds'] = np.mean([win,draw,lose])
55
+ d['std_odds'] = np.std([win,draw,lose])
56
+ d['cv_odds'] = d['std_odds']/d['mean_odds'] if d['mean_odds']>0 else 0.0
57
+ d['p_win'], d['p_draw'], d['p_lose'] = 1/win, 1/draw, 1/lose
58
+ p_tot = d['p_win'] + d['p_draw'] + d['p_lose']
59
+ d['p_win_norm'], d['p_draw_norm'], d['p_lose_norm'] = d['p_win']/p_tot, d['p_draw']/p_tot, d['p_lose']/p_tot
60
+ d['overround'] = p_tot
61
+ d['entropy'] = -sum(x*np.log(x) for x in [d['p_win_norm'], d['p_draw_norm'], d['p_lose_norm']])
62
+ d['spread'] = max(win,draw,lose)-min(win,draw,lose)
63
+ d['spread_draw'] = abs(draw-(win+lose)/2)
64
+ d['odds_ratio_wd'], d['odds_ratio_wl'], d['odds_ratio_dl'] = win/draw, win/lose, draw/lose
65
+ d['draw_prob_ratio'] = d['p_draw']/max(d['p_win'],d['p_lose'])
66
+ d['draw_ratio'] = draw/min(win,lose)
67
+ d['draw_prob_gap'] = abs(d['p_draw']-(d['p_win']+d['p_lose'])/2)
68
+ d['fav_gap'] = abs(win-lose)
69
+ d['fav_draw_gap'] = abs(draw-min(win,lose))
70
+ d['fav_diff'] = abs(win-lose)
71
+ d['draw_gap_mean'] = abs(draw-d['mean_odds'])
72
+ d['rank_win'], d['rank_draw'], d['rank_lose'] = pd.Series([win,draw,lose]).rank().tolist()
73
+ d['ev_win'], d['ev_draw'], d['ev_lose'] = win*d['p_win_norm'], draw*d['p_draw_norm'], lose*d['p_lose_norm']
74
+ d['draw_vs_avg'] = draw/d['mean_odds']
75
+ d['draw_vs_max'] = draw/max(win,draw,lose)
76
+ d['cv_spread'] = d['spread']/d['mean_odds'] if d['mean_odds']>0 else 0.0
77
+ d['cv_draw_gap'] = d['fav_draw_gap']/d['mean_odds'] if d['mean_odds']>0 else 0.0
78
+ d['draw_margin'] = abs(draw-(win+lose)/2)
79
+ d['fav_ratio'] = min(win,lose)/max(win,lose)
80
+ d['draw_skew'] = (draw-win)-(lose-draw)
81
+ d['log_spread'] = np.log(max(win,draw,lose))-np.log(min(win,draw,lose))
82
+ d['draw_entropy_component'] = -d['p_draw_norm']*np.log(d['p_draw_norm'])
83
+ d['dominance_score'] = max(d['p_win_norm'],d['p_lose_norm'])-d['p_draw_norm']
84
+
85
+ # --- 핸디 시장 ---
86
+ d['hmean_odds'] = np.mean([hwin,hdraw,hlose])
87
+ d['hstd_odds'] = np.std([hwin,hdraw,hlose])
88
+ d['hcv_odds'] = d['hstd_odds']/d['hmean_odds'] if d['hmean_odds']>0 else 0.0
89
+ p_h = 1/np.array([hwin,hdraw,hlose])
90
+ p_hn = p_h/p_h.sum()
91
+ d['hp_win'], d['hp_draw'], d['hp_lose'] = p_h
92
+ d['hp_win_norm'], d['hp_draw_norm'], d['hp_lose_norm'] = p_hn
93
+ d['hoverround'] = p_h.sum()
94
+ d['hentropy'] = -np.sum(p_hn*np.log(p_hn))
95
+ d['hspread'] = max(hwin,hdraw,hlose)-min(hwin,hdraw,hlose)
96
+ d['hspread_draw'] = abs(hdraw-(hwin+hlose)/2)
97
+
98
+ # --- 교차 ---
99
+ d['diff_win_prob'] = d['p_win_norm'] - d['hp_win_norm']
100
+ d['diff_draw_prob'] = d['p_draw_norm'] - d['hp_draw_norm']
101
+ d['diff_lose_prob'] = d['p_lose_norm'] - d['hp_lose_norm']
102
+ d['diff_draw_odds'] = hdraw - draw
103
+
104
+ # --- 핸디 plus_base용 보조 ---
105
+ d['base_win_odds'] = win
106
+ d['base_draw_odds'] = draw
107
+ d['base_lose_odds'] = lose
108
+ d['base_overround_ex'] = p_tot
109
+ d['base_entropy_ex'] = d['entropy']
110
+ d['base_spread_ex'] = d['spread']
111
+
112
+ return d
113
+
114
+ def build_feature_frames(win, draw, lose, hwin, hdraw, hlose):
115
+ d = build_feature_dict(win, draw, lose, hwin, hdraw, hlose)
116
+ df_all = pd.DataFrame([d])
117
+ df_base = df_all[expected_cols_base59]
118
+ df_hand = df_all[expected_cols_handicap65]
119
+ return df_base, df_hand
120
+
121
+ # ===============================
122
+ # 모델 로드 (CatBoost 저장물)
123
+ # ===============================
124
+ @st.cache_resource
125
+ def load_models():
126
+ base = joblib.load("cat_model_wdl_softmax.pkl") # 기본 모델 (59피처)
127
+ hand = joblib.load("cat_model_handicap_plus_base.pkl") # 핸디 모델 (65피처)
128
+ enc = joblib.load("cat_label_encoder_handicap.pkl") # ["핸디 승","핸디 무","핸디 패"] 순서 고정 저장 권장
129
+ return base, hand, enc
130
+
131
+ model_base, model_hand, encoder_hand = load_models()
132
+
133
+ # ===============================
134
+ # 예측 함수
135
+ # ===============================
136
+ def predict_all(win, draw, lose, hwin, hdraw, hlose):
137
+ df_input_base, df_input_hand = build_feature_frames(win, draw, lose, hwin, hdraw, hlose)
138
+ # CatBoost는 DataFrame 입력을 바로 받음
139
+ probs_base = model_base.predict_proba(df_input_base)[0]
140
+ probs_hand = model_hand.predict_proba(df_input_hand)[0]
141
+
142
+ # 라벨 순서 명확히 지정
143
+ base_labels = ["승","무","패"]
144
+ hand_labels = ["핸디 승","핸디 무","핸디 패"] # 화면 고정 순서
145
+ return (
146
+ dict(zip(base_labels, probs_base)),
147
+ dict(zip(hand_labels, probs_hand))
148
+ )
149
+
150
+ # ===============================
151
+ # 데이터 로드 (유사 경기 분포용)
152
+ # ===============================
153
+ @st.cache_data
154
+ def load_db():
155
+ df = pd.read_excel("proto_core_65_fastsearch.xlsx", engine="openpyxl")
156
+ # 숫자형 변환
157
+ for c in ["승","무","패","핸디 승","핸디 무","핸디 패"]:
158
+ df[c] = pd.to_numeric(df[c], errors="coerce")
159
+ return df
160
+
161
+ DB = load_db()
162
+
163
+ # ===============================
164
+ # 사이드바 입력
165
+ # ===============================
166
+ st.sidebar.header("⚙️ 입력 배당")
167
+ default_odds = "2.05/3.35/3.45/3.65/3.75/1.90"
168
+ odds_str = st.sidebar.text_input("배당 입력 (승/무/패/핸승/핸무/핸패)", value=default_odds,
169
+ help="예: 2.05/3.35/3.45/3.65/3.75/1.90")
170
+
171
+ try:
172
+ base_win, base_draw, base_lose, hand_win, hand_draw, hand_lose = map(float, odds_str.split("/"))
173
+ except Exception:
174
+ st.error("형식 오류! 예: 2.05/3.35/3.45/3.65/3.75/1.90")
175
+ st.stop()
176
+
177
+ # ===============================
178
+ # 1) 예측 결과
179
+ # ===============================
180
+ base_probs, hand_probs = predict_all(base_win, base_draw, base_lose, hand_win, hand_draw, hand_lose)
181
+
182
+ st.subheader("✅ CatBoost 예측 결과")
183
+ c1, c2 = st.columns(2)
184
+ with c1:
185
+ st.write("### ⚽ 기본 승/무/패 확률")
186
+ cc = st.columns(3)
187
+ for i, k in enumerate(["승","무","패"]):
188
+ cc[i].metric(k, f"{base_probs[k]*100:.2f}%")
189
+ with c2:
190
+ st.write("### 🎯 핸디캡 승/무/패 확률")
191
+ # 항상 '핸디 승 → 핸디 무 → 핸디 패' 순서로 노출
192
+ cc2 = st.columns(3)
193
+ for i, k in enumerate(["핸디 승","핸디 무","핸디 패"]):
194
+ cc2[i].metric(k, f"{hand_probs[k]*100:.2f}%")
195
+
196
+ st.markdown("---")
197
+
198
+ # ===============================
199
+ # 공통: 입력 정배 라벨
200
+ # ===============================
201
+ base_min_label = ["승","무","패"][np.argmin([base_win, base_draw, base_lose])]
202
+ hand_min_label = ["핸디 승","핸디 무","핸디 패"][np.argmin([hand_win, hand_draw, hand_lose])]
203
+
204
+ # ===============================
205
+ # 2) 기본 승무패 결과 분포
206
+ # - 정배 방향 일치 + (승/무/패) 완전 동일
207
+ # ===============================
208
+ st.subheader("① 기본 승무패 결과 분포 (정배 방향 일치 + 배당 완전 동일)")
209
+ mask_base = (
210
+ (DB[["승","무","패"]].idxmin(axis=1) == base_min_label) &
211
+ eq(DB["승"], base_win) & eq(DB["무"], base_draw) & eq(DB["패"], base_lose)
212
+ )
213
+ subset_base = DB.loc[mask_base].copy()
214
+
215
+ if subset_base.empty or "결과" not in subset_base.columns:
216
+ st.info("조건에 맞는 표본이 없습니다.")
217
+ else:
218
+ st.write(f"표본 크기: {subset_base.shape[0]} 경기")
219
+ base_counts = subset_base["결과"].value_counts()
220
+ # 결과는 자연 발생 순서(빈도순)로 두되, 필요시 정렬 고정 가능
221
+ st.dataframe(base_counts.rename_axis("결과").to_frame("경기 수"))
222
+
223
+ # ===============================
224
+ # 3) 핸디캡 승무패 결과 분포
225
+ # - 정배 방향 일치 + (핸승/핸무/핸패) 완전 동일
226
+ # - 표시는 '핸디 승 → 핸디 무 → 핸디 패' 순서로 고정
227
+ # ===============================
228
+ st.subheader("② 핸디캡 승무패 결과 분포 (정배 방향 일치 + 배당 완전 동일)")
229
+ mask_hand = (
230
+ (DB[["핸디 승","핸디 무","핸디 패"]].idxmin(axis=1) == hand_min_label) &
231
+ eq(DB["핸디 승"], hand_win) & eq(DB["핸디 무"], hand_draw) & eq(DB["핸디 패"], hand_lose)
232
+ )
233
+ subset_hand = DB.loc[mask_hand].copy()
234
+
235
+ if subset_hand.empty or "핸디결과" not in subset_hand.columns:
236
+ st.info("조건에 맞는 표본이 없습니다.")
237
+ else:
238
+ st.write(f"표본 크기: {subset_hand.shape[0]} 경기")
239
+ order = ["핸디 승", "핸디 무", "핸디 패"] # 고정 순서
240
+ h_counts = subset_hand["핸디결과"].value_counts()
241
+ h_counts = h_counts.reindex(order).dropna().astype(int)
242
+ st.dataframe(h_counts.rename_axis("핸디결과").to_frame("경기 수"))
243
+
244
+ # ===============================
245
+ # 4) 무 = 입력 무 & 핸무 = 입력 핸무 + 정배(기본/핸디) 둘 다 일치
246
+ # ===============================
247
+ st.subheader("③ 무 = 입력 무 & 핸무 = 입력 핸무 (정배 방향 모두 일치)")
248
+ mask_combo = (
249
+ eq(DB["무"], base_draw) &
250
+ eq(DB["핸디 무"], hand_draw) &
251
+ (DB[["승","무","패"]].idxmin(axis=1) == base_min_label) &
252
+ (DB[["핸디 승","핸디 무","핸디 패"]].idxmin(axis=1) == hand_min_label)
253
+ )
254
+ subset_combo = DB.loc[mask_combo].copy()
255
+
256
+ if subset_combo.empty:
257
+ st.info("조건에 맞는 표본이 없습니다.")
258
+ else:
259
+ st.write(f"표본 크기: {subset_combo.shape[0]} 경기")
260
+ c3a, c3b = st.columns(2)
261
+ if "결과" in subset_combo.columns:
262
+ with c3a:
263
+ st.write("— 기본 결과 분포")
264
+ st.dataframe(subset_combo["결과"].value_counts().rename_axis("결과").to_frame("경기 수"))
265
+ if "핸디결과" in subset_combo.columns:
266
+ with c3b:
267
+ st.write("— 핸디 결과 분포")
268
+ order = ["핸디 승","핸디 무","핸디 패"]
269
+ hc = subset_combo["핸디결과"].value_counts().reindex(order).dropna().astype(int)
270
+ st.dataframe(hc.rename_axis("핸디결과").to_frame("경기 수"))
271
 
272
+ # ===============================
273
+ # 최초 1회 자동 실행 안내
274
+ # ===============================
275
+ st.caption("ⓒ CatBoost 3-Class Softmax Models | 기본: 59피처, 핸디: 65피처(기본시장 보조 포함)")