cstore / app.py
leedami's picture
Upload 7 files
5841e58 verified
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ============================================================
# ํŽธ์˜์  ์ˆ˜์š”์˜ˆ์ธก & ๋ฐœ์ฃผ ์ถ”์ฒœ โ€” Pro Suite (ํŒจ์น˜ ๋ฒ„์ „, ๋ฉ€ํ‹ฐ CSV + ์›”๋ณ„ ๊ทธ๋ž˜ํ”„)
# - โ‘  ์—ฌ๋Ÿฌ CSV ์—…๋กœ๋“œ/์„ ํƒ โ†’ ์ž๋™ ๊ฒฐํ•ฉ(์˜ต์…˜: source ์—ด ์ถ”๊ฐ€)
# - โ‘ก ์ปฌ๋Ÿผ ๋งคํ•‘: "์ปฌ๋Ÿผ๋ช…"์ด ์•„๋‹ˆ๋ผ "์˜ˆ์‹œ ๊ฐ’" ๊ธฐ๋ฐ˜ ์„ ํƒ
# - โ‘ข ์˜ˆ์ธกยท๋ฐœ์ฃผ: ์žฌ๊ณ  ์ปฌ๋Ÿผ ์ž๋™ ์ธ์‹ โ†’ ์˜ˆ์ธก ๊ธฐ๊ฐ„/๋ฐœ์ฃผ๋Ÿ‰ ์ž๋™ ๊ณ„์‚ฐ
# ยท ๋ฆฌ๋“œํƒ€์ž„ / ์„œ๋น„์Šค๋ ˆ๋ฒจ / ์•ˆ์ „์žฌ๊ณ  / MOQ / ํŒฉ๋‹จ์œ„ ์ž…๋ ฅ ์ œ๊ฑฐ
# - โ‘ฃ ๋ถ„์„(๊ทธ๋ž˜ํ”„):
# ยท ์šฐ์‚ฐ: ์›”๋ณ„ ๊ฐ•์ˆ˜๋Ÿ‰ โ†” ์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰ (์‚ฐ์ ๋„ + ํšŒ๊ท€์„  + ์ผ๋ณ„ ์„ ํ˜• ๊ทธ๋ž˜ํ”„)
# ยท ๊ตฐ๊ณ ๊ตฌ๋งˆ: ์›”๋ณ„ ๊ธฐ์˜จ โ†” ๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰ (์‚ฐ์ ๋„ + ํšŒ๊ท€์„  + ์ผ๋ณ„ ์„ ํ˜• ๊ทธ๋ž˜ํ”„)
# ยท ์ „์ฒด: ์šฐ์‚ฐยท๊ตฐ๊ณ ๊ตฌ๋งˆ ์ œ์™ธ ์ „์ฒด ์ƒํ’ˆ ์ผ๋ณ„ ํŒ๋งค๋Ÿ‰ ์„ ํ˜• ๊ทธ๋ž˜ํ”„
# - ์‚ฌ์ด๋“œ๋ฐ”: ์‹คํ–‰ ํŒŒ์ผ ํ‘œ์‹œ + ์บ์‹œ ์ดˆ๊ธฐํ™”
# ============================================================
import os, io, pickle, time, subprocess, sys
from datetime import timedelta
from pathlib import Path
import pandas as pd
import numpy as np
import streamlit as st
import altair as alt
from utils_io import read_csv_flexible, save_utf8sig, ensure_dirs, auto_map_columns
from preprocess import make_matrix
from train_core import train_and_score, save_artifacts
# Altair ๋Œ€์šฉ๋Ÿ‰ ๋ Œ๋”๋ง ์•ˆ์ „์žฅ์น˜ (ํ–‰ ์ˆ˜ ์ œํ•œ ํ•ด์ œ)
alt.data_transformers.disable_max_rows()
# ------------------------------------------------------------
# ํŽ˜์ด์ง€/์‚ฌ์ด๋“œ๋ฐ”
# ------------------------------------------------------------
st.set_page_config(page_title="ํŽธ์˜์  ์ˆ˜์š”์˜ˆ์ธก & ๋ฐœ์ฃผ ์ถ”์ฒœ โ€” Pro Suite (ํŒจ์น˜)", layout="wide")
# __file__ ์ด ์—†๋Š” Colab ๊ฐ™์€ ํ™˜๊ฒฝ ๋ฐฉ์–ด์šฉ
try:
script_name = Path(__file__).resolve().name
except NameError:
script_name = "app_streamlit_pro.py"
st.sidebar.write("๐Ÿงญ ์‹คํ–‰ ํŒŒ์ผ:", script_name)
if st.sidebar.button("์บ์‹œ ์ดˆ๊ธฐํ™” ํ›„ ๋‹ค์‹œ ์‹คํ–‰"):
try:
st.cache_data.clear()
except Exception:
pass
try:
st.cache_resource.clear()
except Exception:
pass
st.experimental_rerun()
# ------------------------------------------------------------
# ๊ธฐ๋ณธ ํ™˜๊ฒฝ/๊ฒฝ๋กœ ์„ค์ •
# ------------------------------------------------------------
PROJ = os.getcwd() # ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ(์•ฑ ๋ฃจํŠธ)
DATA_DIR = os.path.join(PROJ, "data") # CSV ๋ฐ์ดํ„ฐ ํด๋”
ARTI_DIR = os.path.join(PROJ, "artifacts") # ํ•™์Šต ์ค‘๊ฐ„์‚ฐ์ถœ๋ฌผ(๋กœ๊ทธ/์„ฑ๋Šฅ ๋“ฑ) ๋ณด๊ด€
MODELS_DIR = os.path.join(PROJ, "models") # ํ•™์Šต๋œ ๋ชจ๋ธ pkl ๋ณด๊ด€
ensure_dirs(DATA_DIR, ARTI_DIR, MODELS_DIR) # ํด๋” ์—†์œผ๋ฉด ์ƒ์„ฑ
# ------------------------------------------------------------
# ์œ ํ‹ธ: data ํด๋”์˜ CSV ํŒŒ์ผ ๋ฆฌ์ŠคํŠธ ์บ์‹œ
# ------------------------------------------------------------
@st.cache_data(show_spinner=False)
def list_data_files():
try:
return [f for f in os.listdir(DATA_DIR) if f.lower().endswith(".csv")]
except FileNotFoundError:
return []
# ------------------------------------------------------------
# ํผ๋ธ”๋ฆญ URL: cloudflared ์‹œ์ž‘ ํ•จ์ˆ˜
# ------------------------------------------------------------
def start_cloudflared(port=8501):
try:
proc = subprocess.Popen(
["cloudflared", "tunnel", "--url", f"http://localhost:{port}"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
)
st.session_state["_cfd_proc"] = proc # ์ข…๋ฃŒ์šฉ ํ•ธ๋“ค ์ €์žฅ
with st.expander("cloudflared logs"):
for _ in range(120): # ์ตœ์ดˆ 120๋ผ์ธ ์ •๋„๋งŒ ์ฝ์–ด ํ‘œ์‹œ
line = proc.stdout.readline()
if not line:
break
st.text(line.strip())
if "trycloudflare.com" in line:
st.success(line.strip()) # ํผ๋ธ”๋ฆญ URL ํฌํ•จ ๋กœ๊ทธ
break
except FileNotFoundError:
st.error("cloudflared ๋ฐ”์ด๋„ˆ๋ฆฌ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. `pip install cloudflared` ๋˜๋Š” ๋ฐ”์ด๋„ˆ๋ฆฌ ์„ค์น˜ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•˜์„ธ์š”.")
# ------------------------------------------------------------
# ํผ๋ธ”๋ฆญ URL: ngrok ์‹œ์ž‘ ํ•จ์ˆ˜
# ------------------------------------------------------------
def start_ngrok(port=8501, token: str | None = None):
try:
from pyngrok import ngrok, conf
except Exception:
st.error("pyngrok๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค. `pip install pyngrok` ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•˜์„ธ์š”.")
return
# ๊ธฐ์กด ngrok ์„ธ์…˜ ์ •๋ฆฌ(์žฌ์‹คํ–‰ ์‹œ ์ถฉ๋Œ ๋ฐฉ์ง€)
try:
ngrok.kill()
time.sleep(1.0)
except Exception:
pass
token = (token or os.environ.get("NGROK_AUTHTOKEN", "")).strip()
if token:
conf.get_default().auth_token = token
else:
st.warning("NGROK_AUTHTOKEN์ด ๋น„์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ์ธ์ฆ ์—†์ด ์—ด๋ฉด ์ œํ•œ/์—๋Ÿฌ(4018) ๊ฐ€๋Šฅ.")
for attempt in range(2):
try:
tunnel = ngrok.connect(addr=f"http://localhost:{port}", proto="http")
url = tunnel.public_url
st.session_state["_ngrok_tunnel"] = tunnel
st.success(f"๐ŸŒ Public URL: {url}")
st.caption("๋Ÿฐํƒ€์ž„/ํ”„๋กœ์„ธ์Šค๋ฅผ ์ข…๋ฃŒํ•˜๋ฉด ํ„ฐ๋„๋„ ๋‹ซํž™๋‹ˆ๋‹ค.")
break
except Exception as e:
if attempt == 0:
time.sleep(1.5)
else:
msg = str(e)
if "4018" in msg:
st.error("ngrok ์ธ์ฆ ์‹คํŒจ(4018). ํ† ํฐ์„ ๋‹ค์‹œ ํ™•์ธํ•˜์„ธ์š”.")
elif "already online" in msg or "334" in msg:
st.error("๋™์ผ ์—”๋“œํฌ์ธํŠธ๊ฐ€ ์ด๋ฏธ ์—ด๋ ค ์žˆ์Šต๋‹ˆ๋‹ค. ์„ธ์…˜ ์žฌ์‹œ์ž‘ ๋˜๋Š” ๊ธฐ์กด ํ„ฐ๋„ ์ข…๋ฃŒ ํ›„ ์žฌ์‹œ๋„.")
else:
st.error(f"ngrok ์—ฐ๊ฒฐ ์‹คํŒจ: {e}")
# ------------------------------------------------------------
# ์•ฑ ํƒ€์ดํ‹€/ํƒญ ๊ตฌ์„ฑ
# ------------------------------------------------------------
st.title("ํŽธ์˜์  ์ˆ˜์š”์˜ˆ์ธก & ๋ฐœ์ฃผ ์ถ”์ฒœ โ€” Pro Suite")
tabs = st.tabs(["โ‘  ๋ฐ์ดํ„ฐ", "โ‘ก ํ•™์Šต/๋ชจ๋ธ", "โ‘ข ์˜ˆ์ธกยท๋ฐœ์ฃผ", "โ‘ฃ ๋ถ„์„(๊ทธ๋ž˜ํ”„)", "โ‘ค ์ง„๋‹จ/๋กœ๊ทธ"])
# ============================================================
# โ‘  ๋ฐ์ดํ„ฐ: CSV ์—…๋กœ๋“œ/์„ ํƒ + ์ž๋™ ์ปฌ๋Ÿผ ๋งคํ•‘ ์ €์žฅ (๋ฉ€ํ‹ฐ CSV ์ง€์›)
# ============================================================
with tabs[0]:
st.subheader("CSV ์—…๋กœ๋“œ ๋˜๋Š” ์„ ํƒ")
cols_top = st.columns([2,1])
with cols_top[0]:
add_source = st.checkbox("ํŒŒ์ผ๋ช…(source) ์—ด ์ถ”๊ฐ€", value=True, help="์—ฌ๋Ÿฌ CSV๋ฅผ ํ•ฉ์น  ๋•Œ ์›๋ณธ ํŒŒ์ผ๋ช…์„ ๋‚จ๊น๋‹ˆ๋‹ค.")
with cols_top[1]:
st.caption("โ€ป ์—…๋กœ๋“œ/์„ ํƒ ํ›„ ์•„๋ž˜์—์„œ ์ปฌ๋Ÿผ ๋งคํ•‘ ์ €์žฅ")
cols = st.columns(2)
# --- ๋‹ค์ค‘ ํŒŒ์ผ ์—…๋กœ๋“œ ---
with cols[0]:
up_multi = st.file_uploader("CSV ํŒŒ์ผ ์—…๋กœ๋“œ(์—ฌ๋Ÿฌ ๊ฐœ ๊ฐ€๋Šฅ)", type=["csv"], accept_multiple_files=True, key="multi_up")
if up_multi:
dfs = []
for f in up_multi:
raw = f.read()
df_i = read_csv_flexible(io.BytesIO(raw))
if add_source:
df_i["source"] = f.name
dfs.append(df_i)
# data/์— ์ €์žฅ
save_path = os.path.join(DATA_DIR, f.name)
try:
with open(save_path, "wb") as fp:
fp.write(raw)
except Exception as e:
st.warning(f"ํŒŒ์ผ ์ €์žฅ ๊ฒฝ๊ณ ({f.name}): {e}")
try:
list_data_files.clear() # ์บ์‹œ ๋ฌดํšจํ™”
except Exception:
pass
df = pd.concat(dfs, axis=0, ignore_index=True, sort=True)
st.session_state["df"] = df
st.success(f"์—…๋กœ๋“œ/๊ฒฐํ•ฉ ์™„๋ฃŒ: {df.shape} (ํŒŒ์ผ {len(dfs)}๊ฐœ)")
st.dataframe(df.head(20), use_container_width=True)
# --- data ํด๋”์—์„œ ๋‹ค์ค‘ ์„ ํƒ ---
with cols[1]:
files = list_data_files()
picks = st.multiselect("data ํด๋”์—์„œ ์„ ํƒ(์—ฌ๋Ÿฌ ๊ฐœ)", files)
if st.button("์„ ํƒ ํŒŒ์ผ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ", disabled=(len(picks)==0)):
dfs = []
for name in picks:
path = os.path.join(DATA_DIR, name)
df_i = read_csv_flexible(path)
if add_source:
df_i["source"] = name
dfs.append(df_i)
df = pd.concat(dfs, axis=0, ignore_index=True, sort=True)
st.session_state["df"] = df
st.success(f"๋ถˆ๋Ÿฌ์˜ค๊ธฐ/๊ฒฐํ•ฉ ์™„๋ฃŒ: {df.shape} (ํŒŒ์ผ {len(dfs)}๊ฐœ)")
st.dataframe(df.head(20), use_container_width=True)
# --- ์ž๋™ ์ปฌ๋Ÿผ ๋งคํ•‘ + ๋ณด์ • ---
if "df" in st.session_state:
st.divider()
st.caption("์ž๋™ ์ปฌ๋Ÿผ ๋งคํ•‘ โ€” ์„ ํƒ ์—†์ด ์ž๋™ ์ ์šฉ๋ฉ๋‹ˆ๋‹ค.")
df = st.session_state["df"]
# auto_map_columns ๊ฒฐ๊ณผ ์‚ฌ์šฉ
auto = auto_map_columns(df)
mapping = {
"date": auto.get("date"),
"target": auto.get("target"),
"region": auto.get("region"),
"brand": auto.get("brand"),
"item": auto.get("item"),
}
st.session_state["mapping"] = mapping
# โ˜… data ํด๋”์šฉ ๋ณด์ •:
# seoul_gyeonggi_with_demand.csv / usan.csv / gungoguma.csv ๋Š”
# auto_map_columns๊ฐ€ ํƒ€๊นƒ์„ '๊ฐ•์ˆ˜๋Ÿ‰'์œผ๋กœ ์žก๋Š” ์ผ€์ด์Šค๊ฐ€ ์žˆ์–ด์„œ,
# '์ผ์ผํŒ๋งค๋Ÿ‰' ์ปฌ๋Ÿผ์ด ์žˆ์œผ๋ฉด ๊ทธ๊ฑธ target์œผ๋กœ ๊ฐ•์ œ ๊ต์ฒด
if mapping.get("target") == "๊ฐ•์ˆ˜๋Ÿ‰" and "์ผ์ผํŒ๋งค๋Ÿ‰" in df.columns:
mapping["target"] = "์ผ์ผํŒ๋งค๋Ÿ‰"
# ํ™•์ธ์šฉ์œผ๋กœ๋งŒ ์ฝ๊ธฐ ์ „์šฉ ํ…Œ์ด๋ธ” ํ‘œ์‹œ
mapping_view = pd.DataFrame(
{
"์—ญํ• ": ["๋‚ ์งœ(date)", "์ˆ˜์š”/ํŒ๋งค๋Ÿ‰(target)", "์ง€์—ญ/์ ํฌ(region)", "๋ธŒ๋žœ๋“œ(์„ ํƒ)", "์ƒํ’ˆ/ํ’ˆ๋ชฉ(์„ ํƒ)"],
"์ปฌ๋Ÿผ": [
mapping.get("date"),
mapping.get("target"),
mapping.get("region"),
mapping.get("brand"),
mapping.get("item"),
],
}
)
st.write("ํ˜„์žฌ ์ž๋™ ๋งคํ•‘ ๊ฒฐ๊ณผ:")
st.dataframe(mapping_view, use_container_width=True)
# ============================================================
# โ‘ก ํ•™์Šต/๋ชจ๋ธ
# ============================================================
with tabs[1]:
st.subheader("๋ชจ๋ธ ํ•™์Šต")
use_optuna = st.checkbox("Optuna ํ•˜์ดํผํŒŒ๋ผ๋ฏธํ„ฐ ํŠœ๋‹ ์‚ฌ์šฉ", value=False)
trials = st.slider("Optuna ์‹œ๋„ ํšŸ์ˆ˜", 5, 60, 15, 5)
if "df" not in st.session_state or "mapping" not in st.session_state:
st.info("๋จผ์ € โ‘  ํƒญ์—์„œ ๋ฐ์ดํ„ฐ์™€ ์ปฌ๋Ÿผ ๋งคํ•‘์„ ์ง€์ •ํ•˜์„ธ์š”.")
else:
v = st.slider("๊ฒ€์ฆ ๋น„์œจ(valid_ratio)", 0.05, 0.4, 0.2, 0.05)
if st.button("ํ•™์Šต ์‹œ์ž‘"):
# โžœ ์—ฌ๊ธฐ์„œ ์˜ˆ์™ธ๊ฐ€ ๋‚˜๋„ ์•ฑ์ด ์ฃฝ์ง€ ์•Š๋„๋ก ๋ฐฉ์–ด
try:
df, X, y, feat_names = make_matrix(
st.session_state["df"],
st.session_state["mapping"],
)
except Exception as e:
st.error(f"ํ•™์Šต์šฉ ๋ฐ์ดํ„ฐ ๊ตฌ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}")
else:
try:
best_model, lb = train_and_score(
X,
y,
valid_ratio=v,
use_optuna=use_optuna,
optuna_trials=trials,
)
save_artifacts(
[ARTI_DIR, MODELS_DIR],
best_model,
feat_names,
st.session_state["mapping"],
lb,
)
except Exception as e:
st.error(f"๋ชจ๋ธ ํ•™์Šต/์ €์žฅ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}")
else:
st.session_state["leaderboard"] = lb
st.session_state["feat_names"] = feat_names
st.success("ํ•™์Šต ์™„๋ฃŒ")
if "leaderboard" in st.session_state:
st.dataframe(st.session_state["leaderboard"], use_container_width=True)
# ============================================================
# โ‘ข ์˜ˆ์ธกยท๋ฐœ์ฃผ: ๋ฐ˜๋ณต(AR) ์˜ˆ์ธก + ์žฌ๊ณ  ๊ธฐ๋ฐ˜ ์ž๋™ ๋ฐœ์ฃผ ๊ณ„์‚ฐ
# ============================================================
with tabs[2]:
st.subheader("์˜ˆ์ธก(๋ฐ˜๋ณต AR) & ๋ฐœ์ฃผ๋Ÿ‰ ์ถ”์ฒœ")
st.caption("ํ•™์Šต๋œ ๋ชจ๋ธ๋กœ ๋ฏธ๋ž˜ ํ”ผ์ฒ˜๋ฅผ ์ƒ์„ฑํ•˜๊ณ , ์žฌ๊ณ ๋ฅผ ๊ณ ๋ คํ•ด ์ž๋™์œผ๋กœ ๋ฐœ์ฃผ ๊ธฐ๊ฐ„๊ณผ ์ˆ˜๋Ÿ‰์„ ๊ณ„์‚ฐํ•ฉ๋‹ˆ๋‹ค.")
if "df" not in st.session_state or "mapping" not in st.session_state:
st.info("๋จผ์ € โ‘  ํƒญ์—์„œ ๋ฐ์ดํ„ฐ์™€ ์ปฌ๋Ÿผ ๋งคํ•‘์„ ์ง€์ •ํ•˜๊ณ  โ‘ก์—์„œ ํ•™์Šต์„ ์™„๋ฃŒํ•˜์„ธ์š”.")
else:
horizon_days = 14 # ๊ณ ์ • ๊ธฐ๊ฐ„
# ์ •ํ™•๋„(๋ณด์ • ๊ณ„์ˆ˜)
accuracy = st.slider(
"์ •ํ™•๋„(์˜ˆ์ธก ๋ณด์ • ๊ณ„์ˆ˜)",
min_value=0.5,
max_value=2.0,
value=1.0,
step=0.05,
)
# ==============================
# ์„ธ๊ทธ๋จผํŠธ ์„ ํƒ
# ==============================
seg_cols = [
c for c in [
st.session_state["mapping"].get("region"),
st.session_state["mapping"].get("brand"),
st.session_state["mapping"].get("item"),
] if c
]
seg_vals = {}
if seg_cols:
col_objs = st.columns(len(seg_cols))
for i, ccol in enumerate(seg_cols):
opts = ["<์ „์ฒด>"] + sorted(
list(map(str, st.session_state["df"][ccol].dropna().astype(str).unique()))
)
seg_vals[ccol] = col_objs[i].selectbox(f"{ccol} ์„ ํƒ", opts, index=0)
# ==============================
# ๋ฐ˜๋ณต ์˜ˆ์ธก ํ•จ์ˆ˜
# ==============================
def iterative_forecast(df, mapping, model, feat_names, horizon, seg_vals):
df = df.copy()
dtc = mapping["date"]
tgt = mapping["target"]
if dtc not in df.columns or tgt not in df.columns:
st.error(f"์˜ˆ์ธก์— ํ•„์š”ํ•œ ์ปฌ๋Ÿผ์ด ์—†์Šต๋‹ˆ๋‹ค. (date='{dtc}', target='{tgt}')")
return pd.DataFrame(columns=[dtc, "์˜ˆ์ธก์ˆ˜๋Ÿ‰"])
df[dtc] = pd.to_datetime(df[dtc], errors="coerce")
df = df.dropna(subset=[dtc]).sort_values(dtc)
for k, v in seg_vals.items():
if v and v != "<์ „์ฒด>" and k in df.columns:
df = df[df[k].astype(str) == str(v)]
if df.empty:
st.error("์„ ํƒํ•œ ์„ธ๊ทธ๋จผํŠธ์— ํ•ด๋‹นํ•˜๋Š” ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
return pd.DataFrame(columns=[dtc, "์˜ˆ์ธก์ˆ˜๋Ÿ‰"])
if len(df) < 30:
st.warning("ํ•ด๋‹น ์„ธ๊ทธ๋จผํŠธ ๋ฐ์ดํ„ฐ๊ฐ€ ์ ์–ด ์˜ˆ์ธก ํ’ˆ์งˆ์ด ๋‚ฎ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
last_date = df[dtc].max()
hist = list(
pd.to_numeric(df[tgt], errors="coerce")
.fillna(0)
.astype(float)
.values
)
def build_row_features(current_date, hist_vals):
if pd.isna(current_date):
current_date = df[dtc].max()
year = current_date.year
month = current_date.month
day = current_date.day
dow = current_date.weekday()
is_weekend = 1 if dow >= 5 else 0
try:
week = int(pd.Timestamp(current_date).isocalendar().week)
except Exception:
week = 0
def get_lag(k):
if len(hist_vals) >= k:
return float(hist_vals[-k])
return float(np.mean(hist_vals[-min(len(hist_vals), 7):])) if hist_vals else 0.0
lag1 = get_lag(1)
lag7 = get_lag(7)
lag14 = get_lag(14)
def rmean(w):
arr = np.array(hist_vals[-w:]) if len(hist_vals) >= 1 else np.array([0.0])
if len(arr) < max(2, w // 2):
arr = np.array(hist_vals[-max(2, w // 2):]) if len(hist_vals) else np.array([0.0])
return float(np.mean(arr))
def rstd(w):
arr = np.array(hist_vals[-w:]) if len(hist_vals) >= 2 else np.array([0.0, 0.0])
return float(np.std(arr))
feats = {
"year": year,
"month": month,
"day": day,
"dow": dow,
"week": week,
"is_weekend": is_weekend,
"lag1": lag1,
"lag7": lag7,
"lag14": lag14,
"rmean7": rmean(7),
"rmean14": rmean(14),
"rstd7": rstd(7),
"rstd14": rstd(14),
}
for fn in feat_names:
if fn not in feats:
feats[fn] = 0.0
x = [feats.get(fn, 0.0) for fn in feat_names]
return np.array(x, dtype=float)
preds, dates = [], []
cur = last_date
for _ in range(int(horizon)):
cur = cur + timedelta(days=1)
x = build_row_features(cur, hist)
val = float(model.predict([x])[0])
preds.append(val)
dates.append(cur)
hist.append(val)
return pd.DataFrame({dtc: dates, "์˜ˆ์ธก์ˆ˜๋Ÿ‰": preds})
# ==============================
# ์žฌ๊ณ  ์ž๋™ ์ธ์‹
# ==============================
def guess_inventory_onhand(df_seg: pd.DataFrame, mapping):
candidates = [
"์žฌ๊ณ ", "์žฌ๊ณ ์ˆ˜", "์žฌ๊ณ ์ˆ˜๋Ÿ‰",
"ํ˜„์žฌ์žฌ๊ณ ", "onhand", "on_hand",
"stock", "inventory",
]
inv_col = None
for col in df_seg.columns:
low = col.lower()
if any(key in low for key in candidates):
inv_col = col
break
if not inv_col:
return None, None
series = pd.to_numeric(df_seg[inv_col], errors="coerce").dropna()
if series.empty:
return None, None
return inv_col, float(series.iloc[-1])
# ==============================
# ๊ฐ€๊ฒฉ ์ž๋™ ์ธ์‹
# ==============================
def guess_price_column(df_seg):
keys = ["price", "๊ฐ€๊ฒฉ", "๋‹จ๊ฐ€", "ํŒ๋งค๊ฐ€", "amount", "๊ธˆ์•ก"]
for col in df_seg.columns:
low = col.lower()
if any(k in low for k in keys):
return col
return None
# ==============================
# ๋ชจ๋ธ ๋กœ๋“œ
# ==============================
pkl_path = os.path.join(MODELS_DIR, "best_model.pkl")
if os.path.exists(pkl_path):
try:
with open(pkl_path, "rb") as f:
payload = pickle.load(f)
model = payload["model"]
feat_names = payload["feature_names"]
mapping = payload["mapping"]
except Exception as e:
st.error(f"์ €์žฅ๋œ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜: {e}")
else:
dtc = mapping["date"]
# ======================================
# 1) ์˜ˆ์ธก ์ˆ˜ํ–‰
# ======================================
fc_df = iterative_forecast(
st.session_state["df"],
mapping,
model,
feat_names,
horizon_days,
seg_vals,
)
if fc_df.empty:
st.stop()
# ======================================
# 2) ๊ฐ€๊ฒฉ ์ž๋™ ์ธ์‹ + ๊ธˆ์•ก์˜ˆ์ธก
# ======================================
df_seg_price = st.session_state["df"].copy()
for k, v in seg_vals.items():
if v and v != "<์ „์ฒด>" and k in df_seg_price.columns:
df_seg_price = df_seg_price[df_seg_price[k].astype(str) == str(v)]
df_seg_price = df_seg_price.sort_values(dtc)
price_col = guess_price_column(df_seg_price)
if price_col:
price_val = float(
pd.to_numeric(df_seg_price[price_col], errors="coerce").dropna().iloc[-1]
)
st.info(f"CSV '{price_col}' ์ปฌ๋Ÿผ์—์„œ ๊ฐ€๊ฒฉ {price_val:,.0f}์› ์ž๋™ ์ธ์‹.")
else:
price_val = st.number_input(
"๊ฐ€๊ฒฉ(์›) โ€“ CSV์—์„œ ๊ฐ€๊ฒฉ ์ปฌ๋Ÿผ์„ ์ฐพ์ง€ ๋ชปํ•ด ์ง์ ‘ ์ž…๋ ฅ",
min_value=0,
max_value=100000000,
value=0,
)
# **์ˆ˜๋Ÿ‰ ์ดํ•ฉ**
total_qty_demand = float(fc_df["์˜ˆ์ธก์ˆ˜๋Ÿ‰"].sum())
# **๊ธˆ์•ก ์ดํ•ฉ**
fc_df["๊ธˆ์•ก์˜ˆ์ธก"] = (fc_df["์˜ˆ์ธก์ˆ˜๋Ÿ‰"] * price_val * float(accuracy)).clip(lower=0.0)
total_amt_demand = float(fc_df["๊ธˆ์•ก์˜ˆ์ธก"].sum())
# ======================================
# 3) ์žฌ๊ณ  ์ž๋™ ์ธ์‹
# ======================================
df_seg = st.session_state["df"].copy()
df_seg[dtc] = pd.to_datetime(df_seg[dtc], errors="coerce")
for k, v in seg_vals.items():
if v and v != "<์ „์ฒด>" and k in df_seg.columns:
df_seg = df_seg[df_seg[k].astype(str) == str(v)]
df_seg = df_seg.sort_values(dtc)
inv_col, onhand_auto = guess_inventory_onhand(df_seg, mapping)
if onhand_auto is None:
onhand = st.number_input(
"ํ˜„์žฌ ์žฌ๊ณ (์ง์ ‘ ์ž…๋ ฅ)",
min_value=0,
max_value=100000,
value=0,
)
else:
onhand = onhand_auto
st.info(f"์žฌ๊ณ  '{inv_col}' ์ž๋™ ์ธ์‹ โ†’ {onhand:,.0f}๊ฐœ")
# ======================================
# 4) ๋ฐœ์ฃผ๋Ÿ‰/์†Œ์ง„์ผ ๊ณ„์‚ฐ (์ˆ˜๋Ÿ‰ ๊ธฐ์ค€)
# ======================================
avg_daily_qty = total_qty_demand / horizon_days if horizon_days > 0 else 0.0
days_to_out = (onhand / avg_daily_qty) if avg_daily_qty > 0 else float("inf")
rec_qty = max(0.0, total_qty_demand - onhand)
c1, c2, c3 = st.columns(3)
c1.metric("์˜ˆ์ธก ๊ธฐ๊ฐ„(์ผ)", f"{horizon_days}")
c2.metric("์žฌ๊ณ  ์†Œ์ง„ ์˜ˆ์ƒ์ผ์ˆ˜", "โˆž" if np.isinf(days_to_out) else f"{days_to_out:,.1f}")
c3.metric("2์ฃผ ์ด ์˜ˆ์ƒ ๋งค์ถœ", f"{total_amt_demand:,.0f}์›")
# ======================================
# 5) ํ‘œ ์ถœ๋ ฅ
# ======================================
st.dataframe(fc_df.set_index(dtc), use_container_width=True)
st.caption("โ€ป ์˜ˆ์ธก์ˆ˜๋Ÿ‰ ร— ๊ฐ€๊ฒฉ ร— ์ •ํ™•๋„ ๋ณด์ • = ๊ธˆ์•ก์˜ˆ์ธก")
else:
st.warning("best_model.pkl ์ด ์—†์Šต๋‹ˆ๋‹ค. โ‘ก ํƒญ์—์„œ ํ•™์Šต์„ ๋จผ์ € ์ˆ˜ํ–‰ํ•˜์„ธ์š”.")
# ============================================================
# โ‘ฃ ๋ถ„์„(๊ทธ๋ž˜ํ”„):
# - ์šฐ์‚ฐ: ํ•œ ๋‹ฌ ๊ฐ•์ˆ˜๋Ÿ‰ vs ์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰ (์‚ฐ์ ๋„ + ํšŒ๊ท€์„  + ์ผ๋ณ„ ์„ ํ˜• ๊ทธ๋ž˜ํ”„)
# - ๊ตฐ๊ณ ๊ตฌ๋งˆ: ํ•œ ๋‹ฌ ๊ธฐ์˜จ vs ๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰ (์‚ฐ์ ๋„ + ํšŒ๊ท€์„  + ์ผ๋ณ„ ์„ ํ˜• ๊ทธ๋ž˜ํ”„)
# - ์ „์ฒด: ์šฐ์‚ฐยท๊ตฐ๊ณ ๊ตฌ๋งˆ ์ œ์™ธ ์ผ๋ณ„ ํŒ๋งค๋Ÿ‰ ์„ ํ˜• ๊ทธ๋ž˜ํ”„
# ============================================================
with tabs[3]:
st.subheader("๋ถ„์„(๊ทธ๋ž˜ํ”„) โ€” ํ•œ ๋‹ฌ ๋‹จ์œ„ ์ƒ๊ด€ ๋ถ„์„")
if "df" not in st.session_state or "mapping" not in st.session_state or not st.session_state["mapping"].get("date"):
st.info("๋จผ์ € โ‘  ํƒญ์—์„œ ๋ฐ์ดํ„ฐ์™€ ์ปฌ๋Ÿผ ๋งคํ•‘(ํŠนํžˆ '๋‚ ์งœ'์™€ 'ํƒ€๊นƒ')์„ ์ง€์ •ํ•˜์„ธ์š”.")
else:
mapping = st.session_state["mapping"]
date_col = mapping["date"]
target_col = mapping.get("target")
def guess(colnames, cands):
low = [str(c).lower() for c in colnames]
for key in cands:
key_low = str(key).lower()
for i, l in enumerate(low):
if key_low in l:
return colnames[i]
return None
# ๊ณตํ†ต: ์—ฐ-์›” ์„ ํƒ์šฉ ์˜ต์…˜ ๋งŒ๋“œ๋Š” ํ•จ์ˆ˜
def build_year_month_options(df, date_col):
df = df.copy()
df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
df = df.dropna(subset=[date_col])
if df.empty:
return df, []
df["year_month"] = df[date_col].dt.to_period("M")
ym_unique = sorted(df["year_month"].unique())
ym_labels = [str(p) for p in ym_unique] # '2024-10' ๊ฐ™์€ ํ˜•์‹
return df, list(zip(ym_labels, ym_unique))
tab_u, tab_g, tab_all = st.tabs([
"โ˜” ์šฐ์‚ฐ: ํ•œ ๋‹ฌ ๊ฐ•์ˆ˜๋Ÿ‰ vs ํŒ๋งค๋Ÿ‰",
"๐Ÿ  ๊ตฐ๊ณ ๊ตฌ๋งˆ: ํ•œ ๋‹ฌ ๊ธฐ์˜จ vs ํŒ๋งค๋Ÿ‰",
"๐Ÿ“ˆ ์ „์ฒด: ์šฐ์‚ฐยท๊ตฐ๊ณ ๊ตฌ๋งˆ ์ œ์™ธ ์ผ๋ณ„ ํŒ๋งค๋Ÿ‰(์„ ํ˜•)"
])
# ------------------------------
# 1) ์šฐ์‚ฐ: ์„ ํƒํ•œ ํ•œ ๋‹ฌ์˜ ๊ฐ•์ˆ˜๋Ÿ‰ โ†” ์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰
# ------------------------------
with tab_u:
st.caption("์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰๊ณผ ๊ฐ•์ˆ˜๋Ÿ‰์˜ ๊ด€๊ณ„๋ฅผ 'ํ•œ ๋‹ฌ' ๋‹จ์œ„๋กœ ๋ด…๋‹ˆ๋‹ค.")
up_u = st.file_uploader("์šฐ์‚ฐ/๋‚ ์”จ ๋ฐ์ดํ„ฐ CSV (์„ ํƒ)", type=["csv"], key="umbrella_month_up")
if up_u is not None:
df_u_raw = read_csv_flexible(io.BytesIO(up_u.read()))
else:
df_u_raw = st.session_state["df"].copy()
if date_col not in df_u_raw.columns:
st.warning(f"๋‚ ์งœ ์ปฌ๋Ÿผ '{date_col}' ์„(๋ฅผ) ๋ฐ์ดํ„ฐ์—์„œ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.")
else:
# item์—์„œ ์šฐ์‚ฐ๋งŒ ํ•„ํ„ฐ (์žˆ์œผ๋ฉด)
item_col = mapping.get("item")
if item_col and item_col in df_u_raw.columns:
mask = df_u_raw[item_col].astype(str).str.contains("์šฐ์‚ฐ|umbrella", case=False, na=False)
if mask.any():
df_u_raw = df_u_raw[mask]
cols_all = list(df_u_raw.columns)
# ํŒ๋งค๋Ÿ‰ ์ปฌ๋Ÿผ: ๋งคํ•‘ target ์šฐ์„ , ์—†์œผ๋ฉด ์ถ”์ •
sales_col = target_col if target_col in cols_all else guess(
cols_all,
["umbrella", "์šฐ์‚ฐ", "์ผ์ผํŒ๋งค๋Ÿ‰", "ํŒ๋งค๋Ÿ‰", "sales", "qty", "quantity", "target"],
)
# ๊ฐ•์ˆ˜๋Ÿ‰ ์ปฌ๋Ÿผ ์ถ”์ •
rain_col = guess(
cols_all,
["rain", "precip", "precipitation", "๊ฐ•์ˆ˜", "๊ฐ•์ˆ˜๋Ÿ‰", "์ผ๊ฐ•์ˆ˜๋Ÿ‰", "๊ฐ•์šฐ", "๊ฐ•์šฐ๋Ÿ‰"],
)
if not sales_col or not rain_col:
st.warning(
"์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰ ๋˜๋Š” ๊ฐ•์ˆ˜๋Ÿ‰ ์ปฌ๋Ÿผ์„ ์ž๋™์œผ๋กœ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.\n"
"ํŒ๋งค๋Ÿ‰: '์šฐ์‚ฐ/umbrella/ํŒ๋งค๋Ÿ‰/sales', ๊ฐ•์ˆ˜๋Ÿ‰: '๊ฐ•์ˆ˜๋Ÿ‰/rain' ๋“ฑ์˜ ์ด๋ฆ„์„ ์‚ฌ์šฉํ•ด ์ฃผ์„ธ์š”."
)
else:
# ๋‚ ์งœ/์ˆซ์ž ํ˜•์‹ ์ •๋ฆฌ + ์—ฐ-์›” ์˜ต์…˜ ์ƒ์„ฑ
df_u_raw[sales_col] = pd.to_numeric(df_u_raw[sales_col], errors="coerce")
df_u_raw[rain_col] = pd.to_numeric(df_u_raw[rain_col], errors="coerce")
df_u_raw, ym_options = build_year_month_options(df_u_raw, date_col)
if not ym_options:
st.info("์œ ํšจํ•œ ๋‚ ์งœ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
# ์—ฐ-์›” ์„ ํƒ (YYYY-MM ํ˜•์‹๋งŒ ๋ณด์—ฌ์คŒ)
labels = [lab for lab, _ in ym_options]
default_idx = len(labels) - 1 # ๊ธฐ๋ณธ๊ฐ’: ๊ฐ€์žฅ ์ตœ๊ทผ ์›”
sel_label = st.selectbox("๋ถ„์„ํ•  ์—ฐ์›”(YYYY-MM)", labels, index=default_idx, key="ym_umbrella")
sel_period = dict(ym_options)[sel_label]
# ์„ ํƒํ•œ ํ•œ ๋‹ฌ๋งŒ ํ•„ํ„ฐ
df_month = df_u_raw[df_u_raw["year_month"] == sel_period].copy()
if df_month.empty:
st.info(f"{sel_label} ์— ํ•ด๋‹นํ•˜๋Š” ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
# ์ผ ๋‹จ์œ„ ์ง‘๊ณ„
df_month["date_only"] = df_month[date_col].dt.date
daily = (
df_month.groupby("date_only", as_index=False)
.agg({sales_col: "sum", rain_col: "mean"})
.dropna(subset=[sales_col, rain_col])
)
daily = daily.rename(
columns={"date_only": "date", sales_col: "sales", rain_col: "rain"}
)
if daily.empty:
st.info("ํ•ด๋‹น ์—ฐ์›”์—์„œ ์ผ๋ณ„๋กœ ์ง‘๊ณ„ํ•  ์ˆ˜ ์žˆ๋Š” ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
st.markdown(f"**{sel_label} ํ•œ ๋‹ฌ ๊ธฐ์ค€ ยท ๊ฐ•์ˆ˜๋Ÿ‰์— ๋”ฐ๋ฅธ ์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰**")
base = alt.Chart(daily).encode(
x=alt.X("rain:Q", title="์ผ ๊ฐ•์ˆ˜๋Ÿ‰"),
y=alt.Y("sales:Q", title="์ผ ์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰"),
)
# ๋ถ‰์€์ƒ‰ ์‚ฐ์ ๋„ + ์„ ํ˜• ํšŒ๊ท€์„ 
points = base.mark_circle(size=70, color="#d62728").encode(
tooltip=[
alt.Tooltip("date:T", title="๋‚ ์งœ"),
alt.Tooltip("rain:Q", title="๊ฐ•์ˆ˜๋Ÿ‰"),
alt.Tooltip("sales:Q", title="์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰"),
]
)
reg_line = base.transform_regression("rain", "sales").mark_line(color="#b22222")
st.altair_chart((points + reg_line).interactive(), use_container_width=True)
# โ˜… ์ถ”๊ฐ€: ์ผ๋ณ„ ์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰ ์„ ํ˜• ๊ทธ๋ž˜ํ”„
st.markdown("**์ผ๋ณ„ ์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰ ์ถ”์„ธ(์„ ํ˜• ๊ทธ๋ž˜ํ”„)**")
line_umbrella = (
alt.Chart(daily)
.mark_line()
.encode(
x=alt.X("date:T", title="๋‚ ์งœ"),
y=alt.Y("sales:Q", title="์ผ ์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰"),
tooltip=[
alt.Tooltip("date:T", title="๋‚ ์งœ"),
alt.Tooltip("sales:Q", title="์šฐ์‚ฐ ํŒ๋งค๋Ÿ‰"),
alt.Tooltip("rain:Q", title="๊ฐ•์ˆ˜๋Ÿ‰"),
],
)
)
st.altair_chart(line_umbrella.interactive(), use_container_width=True)
# ์ฐธ๊ณ ์šฉ ํ…Œ์ด๋ธ”
st.dataframe(daily, use_container_width=True)
# ------------------------------
# 2) ๊ตฐ๊ณ ๊ตฌ๋งˆ: ์„ ํƒํ•œ ํ•œ ๋‹ฌ์˜ ๊ธฐ์˜จ โ†” ๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰
# ------------------------------
with tab_g:
st.caption("๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰๊ณผ ๊ธฐ์˜จ(์ถ”์œ„)์˜ ๊ด€๊ณ„๋ฅผ 'ํ•œ ๋‹ฌ' ๋‹จ์œ„๋กœ ๋ด…๋‹ˆ๋‹ค.")
up_g = st.file_uploader("๊ตฐ๊ณ ๊ตฌ๋งˆ/๋‚ ์”จ ๋ฐ์ดํ„ฐ CSV (์„ ํƒ)", type=["csv"], key="goguma_month_up")
if up_g is not None:
df_g_raw = read_csv_flexible(io.BytesIO(up_g.read()))
else:
df_g_raw = st.session_state["df"].copy()
if date_col not in df_g_raw.columns:
st.warning(f"๋‚ ์งœ ์ปฌ๋Ÿผ '{date_col}' ์„(๋ฅผ) ๋ฐ์ดํ„ฐ์—์„œ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.")
else:
# item์—์„œ ๊ตฐ๊ณ ๊ตฌ๋งˆ๋งŒ ํ•„ํ„ฐ (์žˆ์œผ๋ฉด)
item_col_g = mapping.get("item")
if item_col_g and item_col_g in df_g_raw.columns:
mask_g = df_g_raw[item_col_g].astype(str).str.contains(
"๊ณ ๊ตฌ๋งˆ|๊ตฐ๊ณ ๊ตฌ๋งˆ|sweet|goguma", case=False, na=False
)
if mask_g.any():
df_g_raw = df_g_raw[mask_g]
cols_all_g = list(df_g_raw.columns)
goguma_col = target_col if target_col in cols_all_g else guess(
cols_all_g,
["๊ณ ๊ตฌ๋งˆ", "๊ตฐ๊ณ ๊ตฌ๋งˆ", "sweetpotato", "goguma", "ํŒ๋งค๋Ÿ‰", "sales", "qty", "quantity", "target"],
)
temp_col = guess(
cols_all_g,
["์˜จ๋„", "tmin", "temp_min", "min_temp", "์ตœ์ €", "์ตœ์ €๊ธฐ์˜จ", "์ผ์ตœ์ €๊ธฐ์˜จ", "temperature", "temp"],
)
if not goguma_col or not temp_col:
st.warning(
"๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰ ๋˜๋Š” ๊ธฐ์˜จ ์ปฌ๋Ÿผ์„ ์ž๋™์œผ๋กœ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.\n"
"ํŒ๋งค๋Ÿ‰: '๊ตฐ๊ณ ๊ตฌ๋งˆ/๊ณ ๊ตฌ๋งˆ/sales/target', ๊ธฐ์˜จ: 'tmin/์ตœ์ €๊ธฐ์˜จ/temperature' ๋“ฑ์˜ ์ด๋ฆ„์„ ์‚ฌ์šฉํ•ด ์ฃผ์„ธ์š”."
)
else:
df_g_raw[goguma_col] = pd.to_numeric(df_g_raw[goguma_col], errors="coerce")
df_g_raw[temp_col] = pd.to_numeric(df_g_raw[temp_col], errors="coerce")
df_g_raw, ym_options_g = build_year_month_options(df_g_raw, date_col)
if not ym_options_g:
st.info("์œ ํšจํ•œ ๋‚ ์งœ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
labels_g = [lab for lab, _ in ym_options_g]
default_idx_g = len(labels_g) - 1
sel_label_g = st.selectbox("๋ถ„์„ํ•  ์—ฐ์›”(YYYY-MM)", labels_g, index=default_idx_g, key="ym_goguma")
sel_period_g = dict(ym_options_g)[sel_label_g]
df_month_g = df_g_raw[df_g_raw["year_month"] == sel_period_g].copy()
if df_month_g.empty:
st.info(f"{sel_label_g} ์— ํ•ด๋‹นํ•˜๋Š” ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
df_month_g["date_only"] = df_month_g[date_col].dt.date
daily_g = (
df_month_g.groupby("date_only", as_index=False)
.agg({goguma_col: "sum", temp_col: "mean"})
.dropna(subset=[goguma_col, temp_col])
)
daily_g = daily_g.rename(
columns={"date_only": "date", goguma_col: "sales", temp_col: "temp"}
)
if daily_g.empty:
st.info("ํ•ด๋‹น ์—ฐ์›”์—์„œ ์ผ๋ณ„๋กœ ์ง‘๊ณ„ํ•  ์ˆ˜ ์žˆ๋Š” ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
st.markdown(f"**{sel_label_g} ํ•œ ๋‹ฌ ๊ธฐ์ค€ ยท ๊ธฐ์˜จ์— ๋”ฐ๋ฅธ ๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰**")
base_g = alt.Chart(daily_g).encode(
x=alt.X("temp:Q", title="์ผ ํ‰๊ท  ๊ธฐ์˜จ"),
y=alt.Y("sales:Q", title="์ผ ๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰"),
)
points_g = base_g.mark_circle(size=70, color="#ff7f0e").encode(
tooltip=[
alt.Tooltip("date:T", title="๋‚ ์งœ"),
alt.Tooltip("temp:Q", title="๊ธฐ์˜จ"),
alt.Tooltip("sales:Q", title="๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰"),
]
)
reg_g = base_g.transform_regression("temp", "sales").mark_line(color="#d35400")
st.altair_chart((points_g + reg_g).interactive(), use_container_width=True)
# โ˜… ์ถ”๊ฐ€: ์ผ๋ณ„ ๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰ ์„ ํ˜• ๊ทธ๋ž˜ํ”„
st.markdown("**์ผ๋ณ„ ๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰ ์ถ”์„ธ(์„ ํ˜• ๊ทธ๋ž˜ํ”„)**")
line_goguma = (
alt.Chart(daily_g)
.mark_line()
.encode(
x=alt.X("date:T", title="๋‚ ์งœ"),
y=alt.Y("sales:Q", title="์ผ ๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰"),
tooltip=[
alt.Tooltip("date:T", title="๋‚ ์งœ"),
alt.Tooltip("temp:Q", title="๊ธฐ์˜จ"),
alt.Tooltip("sales:Q", title="๊ตฐ๊ณ ๊ตฌ๋งˆ ํŒ๋งค๋Ÿ‰"),
],
)
)
st.altair_chart(line_goguma.interactive(), use_container_width=True)
st.dataframe(daily_g, use_container_width=True)
# ------------------------------
# 3) ์ „์ฒด: ์šฐ์‚ฐยท๊ตฐ๊ณ ๊ตฌ๋งˆ ์ œ์™ธ ์ „์ฒด ์ƒํ’ˆ ์ผ๋ณ„ ํŒ๋งค๋Ÿ‰ ์„ ํ˜• ๊ทธ๋ž˜ํ”„
# ------------------------------
with tab_all:
st.caption("์šฐ์‚ฐยท๊ตฐ๊ณ ๊ตฌ๋งˆ๋ฅผ ์ œ์™ธํ•œ ๋ชจ๋“  ์ƒํ’ˆ์˜ ์ผ๋ณ„ ํŒ๋งค๋Ÿ‰ ์ถ”์„ธ๋ฅผ ํ•œ ๋ฒˆ์— ๋ด…๋‹ˆ๋‹ค.")
df_all = st.session_state["df"].copy()
if date_col not in df_all.columns or not target_col or target_col not in df_all.columns:
st.warning(f"๋‚ ์งœ('{date_col}') ๋˜๋Š” ํƒ€๊นƒ('{target_col}') ์ปฌ๋Ÿผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
else:
# item ์ปฌ๋Ÿผ์ด ์žˆ์œผ๋ฉด ์šฐ์‚ฐ/๊ตฐ๊ณ ๊ตฌ๋งˆ ๊ด€๋ จ ์ƒํ’ˆ ์ œ์™ธ
item_col_all = mapping.get("item")
if item_col_all and item_col_all in df_all.columns:
ex_mask = df_all[item_col_all].astype(str).str.contains(
"์šฐ์‚ฐ|umbrella|๊ณ ๊ตฌ๋งˆ|๊ตฐ๊ณ ๊ตฌ๋งˆ|sweet|goguma", case=False, na=False
)
df_all = df_all[~ex_mask]
df_all[target_col] = pd.to_numeric(df_all[target_col], errors="coerce")
df_all, ym_options_all = build_year_month_options(df_all, date_col)
if not ym_options_all:
st.info("์œ ํšจํ•œ ๋‚ ์งœ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
labels_all = [lab for lab, _ in ym_options_all]
default_idx_all = len(labels_all) - 1
sel_label_all = st.selectbox(
"๋ถ„์„ํ•  ์—ฐ์›”(YYYY-MM)",
labels_all,
index=default_idx_all,
key="ym_all",
)
sel_period_all = dict(ym_options_all)[sel_label_all]
df_month_all = df_all[df_all["year_month"] == sel_period_all].copy()
if df_month_all.empty:
st.info(f"{sel_label_all} ์— ํ•ด๋‹นํ•˜๋Š” ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
df_month_all["date_only"] = df_month_all[date_col].dt.date
daily_all = (
df_month_all.groupby("date_only", as_index=False)
.agg({target_col: "sum"})
.dropna(subset=[target_col])
)
daily_all = daily_all.rename(
columns={"date_only": "date", target_col: "sales"}
)
if daily_all.empty:
st.info("ํ•ด๋‹น ์—ฐ์›”์—์„œ ์ผ๋ณ„๋กœ ์ง‘๊ณ„ํ•  ์ˆ˜ ์žˆ๋Š” ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
st.markdown(f"**{sel_label_all} ํ•œ ๋‹ฌ ๊ธฐ์ค€ ยท ์šฐ์‚ฐยท๊ตฐ๊ณ ๊ตฌ๋งˆ ์ œ์™ธ ์ „์ฒด ์ƒํ’ˆ ์ผ๋ณ„ ํŒ๋งค๋Ÿ‰(์„ ํ˜•)**")
line_all = (
alt.Chart(daily_all)
.mark_line()
.encode(
x=alt.X("date:T", title="๋‚ ์งœ"),
y=alt.Y("sales:Q", title="์ผ ํŒ๋งค๋Ÿ‰(์ „์ฒด ์ƒํ’ˆ ํ•ฉ๊ณ„)"),
tooltip=[
alt.Tooltip("date:T", title="๋‚ ์งœ"),
alt.Tooltip("sales:Q", title="์ผ ํŒ๋งค๋Ÿ‰ ํ•ฉ๊ณ„"),
],
)
)
st.altair_chart(line_all.interactive(), use_container_width=True)
st.dataframe(daily_all, use_container_width=True)
# ============================================================
# โ‘ค ์ง„๋‹จ/๋กœ๊ทธ: ๊ฒฝ๋กœ/ํŒŒ์ผ ํ™•์ธ + ํผ๋ธ”๋ฆญ URL ์—ด๊ธฐ/๋‹ซ๊ธฐ
# ============================================================
with tabs[4]:
st.subheader("๊ฒฝ๋กœ/ํŒŒ์ผ ์ƒํƒœ")
cols = st.columns(2)
with cols[0]:
st.write("**data**", DATA_DIR)
st.write(os.listdir(DATA_DIR) if os.path.exists(DATA_DIR) else [])
st.write("**artifacts**", ARTI_DIR)
st.write(os.listdir(ARTI_DIR) if os.path.exists(ARTI_DIR) else [])
with cols[1]:
st.write("**models**", MODELS_DIR)
st.write(os.listdir(MODELS_DIR) if os.path.exists(MODELS_DIR) else [])
st.caption("ํ•„์š” ์‹œ ํผ๋ธ”๋ฆญ URL์„ ์—ด์–ด ์™ธ๋ถ€์—์„œ ์ ‘์†ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
mode = st.radio("ํผ๋ธ”๋ฆญ URL ํ„ฐ๋„๋Ÿฌ", ["ngrok", "cloudflared"], horizontal=True, index=0)
ngk = None
if mode == "ngrok":
ngk = st.text_input(
"NGROK_AUTHTOKEN",
value=os.environ.get("NGROK_AUTHTOKEN", ""),
type="password",
help="ํ™˜๊ฒฝ๋ณ€์ˆ˜์— ๋„ฃ์–ด๋‘๋ฉด ๋‹ค์Œ๋ถ€ํ„ฐ ์ž๋™ ์ธ์‹ํ•ฉ๋‹ˆ๋‹ค.",
)
c_open, c_close = st.columns(2)
if c_open.button("ํผ๋ธ”๋ฆญ URL ์—ด๊ธฐ", use_container_width=True):
if mode == "ngrok":
if ngk:
os.environ["NGROK_AUTHTOKEN"] = ngk
start_ngrok()
else:
start_cloudflared()
if c_close.button("ํผ๋ธ”๋ฆญ URL ๋‹ซ๊ธฐ", use_container_width=True):
if mode == "ngrok":
try:
from pyngrok import ngrok
ngrok.kill()
st.info("ngrok ํ„ฐ๋„์„ ์ข…๋ฃŒํ–ˆ์Šต๋‹ˆ๋‹ค.")
except Exception as e:
st.warning(f"ngrok ์ข…๋ฃŒ ์ค‘ ๊ฒฝ๊ณ : {e}")
else:
proc = st.session_state.get("_cfd_proc")
if proc:
proc.terminate()
st.info("cloudflared ํ„ฐ๋„์„ ์ข…๋ฃŒํ–ˆ์Šต๋‹ˆ๋‹ค.")
else:
st.info("cloudflared ํ™œ์„ฑ ํ”„๋กœ์„ธ์Šค๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")