academy / app.py
soojeongcrystal's picture
Remove pyarrow dependency and usage to reduce build size and fix OOM
b5aacf9
import streamlit as st
import pandas as pd
import numpy as np
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE
import re
import io
import time
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# CSS ์Šคํƒ€์ผ ์ •์˜
def local_css():
st.markdown(
"""
<style>
.main .block-container {
max-width: 1200px; # ์ตœ๋Œ€ ๋„ˆ๋น„ ์„ค์ •
padding: 1rem 2rem; # ์ขŒ์šฐ ์—ฌ๋ฐฑ ์ถ”๊ฐ€
margin: 0 auto; # ์ค‘์•™ ์ •๋ ฌ
}
# ๊ธฐ์กด CSS ์Šคํƒ€์ผ ์œ ์ง€
.sidebar-option {
padding: 10px 15px;
margin: 5px 0;
border-radius: 5px;
cursor: pointer;
transition: background-color 0.3s;
}
.sidebar-option:hover {
background-color: #ff4b4b20;
}
.sidebar-option.selected {
background-color: #ff4b4b;
color: white;
}
div[data-testid="stSidebar"] ul {
padding-left: 0;
}
.stProgress .st-bo {
background-color: #ff4b4b;
}
button[kind="secondary"] {
background-color: transparent;
color: #ff4b4b;
border: 1px solid #ff4b4b;
}
button[kind="secondary"]:hover {
border-color: #ff2b2b;
color: #ff2b2b;
}
div.stButton > button:first-child {
width: 100%;
margin-bottom: 10px;
}
.custom-subheader {
font-size: 1.2rem;
color: #0068c9;
margin-bottom: 1rem;
}
</style>
""",
unsafe_allow_html=True,
)
# ์‚ฌ์ด๋“œ๋ฐ” ๋ฉ”๋‰ด ์ƒ์„ฑ
def sidebar_menu():
"""์‚ฌ์ด๋“œ๋ฐ” ๋ฉ”๋‰ด ์ƒ์„ฑ ๋ฐ ์ƒํƒœ ๊ด€๋ฆฌ"""
st.sidebar.title("๋ฉ”๋‰ด")
menu_options = ["1) ๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํ†ตํ•ฉ", "2) ๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํ†ต๊ณ„", "3) PPT ์ฃผ์š”์‘๋‹ต ์ถ”์ถœ"]
# ์„ธ์…˜ ์ƒํƒœ ์ดˆ๊ธฐํ™”
if "current_menu" not in st.session_state:
st.session_state.current_menu = menu_options[0]
# ๊ฐ ๋ฉ”๋‰ด ์˜ต์…˜์— ๋Œ€ํ•œ ๋ฒ„ํŠผ ์ƒ์„ฑ
for option in menu_options:
button_style = (
"secondary" if st.session_state.current_menu != option else "primary"
)
if st.sidebar.button(
option,
key=f"btn_{option}",
help=f"{option} ํŽ˜์ด์ง€๋กœ ์ด๋™",
use_container_width=True,
type=button_style,
):
st.session_state.current_menu = option
st.rerun()
return st.session_state.current_menu
# ํŒŒ์ผ ์ฝ๊ธฐ ํ•จ์ˆ˜
def read_uploaded_file(uploaded_file, show_success=False):
"""ํ–ฅ์ƒ๋œ ํŒŒ์ผ ์ฝ๊ธฐ ํ•จ์ˆ˜"""
try:
# ํŒŒ์ผ ๊ฐ์ฒด ๊ฒ€์ฆ
if uploaded_file is None:
return None
# ํŒŒ์ผ ํฌ๊ธฐ ํ™•์ธ (1GB ์ œํ•œ)
MAX_FILE_SIZE = 1024 * 1024 * 1024 # 1GB in bytes
if uploaded_file.size > MAX_FILE_SIZE:
st.error(f"ํŒŒ์ผ ํฌ๊ธฐ๊ฐ€ ๋„ˆ๋ฌด ํฝ๋‹ˆ๋‹ค. ์ตœ๋Œ€ 1GB๊นŒ์ง€ ํ—ˆ์šฉ๋ฉ๋‹ˆ๋‹ค.")
return None
file_type = uploaded_file.name.split(".")[-1].lower()
# ๋ฉ”๋ชจ๋ฆฌ ํšจ์œจ์ ์ธ ํŒŒ์ผ ์ฝ๊ธฐ
if file_type == "csv":
try:
df = pd.read_csv(uploaded_file, low_memory=True)
except Exception as e:
st.error(f"CSV ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}")
return None
else:
try:
df = pd.read_excel(uploaded_file, engine="openpyxl")
except Exception as e:
st.error(f"Excel ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}")
return None
if show_success:
st.success(f"ํŒŒ์ผ '{uploaded_file.name}' ๋กœ๋“œ ์™„๋ฃŒ")
return df
except Exception as e:
st.error(f"ํŒŒ์ผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
return None
# ์ค‘๋ณต ํ‚ค ํ™•์ธ ํ•จ์ˆ˜
def check_duplicates(df, key_columns, context=""):
"""๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์˜ ์ค‘๋ณต ํ‚ค ํ™•์ธ"""
duplicates = df[df.duplicated(subset=key_columns, keep=False)]
if not duplicates.empty:
st.warning(f"{context}์—์„œ ์ค‘๋ณต ํ‚ค๊ฐ€ ๋ฐœ๊ฒฌ๋˜์—ˆ์Šต๋‹ˆ๋‹ค:")
st.dataframe(duplicates[key_columns])
return True
return False
# ์—‘์…€ ๋‹ค์šด๋กœ๋“œ ๋ฐ์ดํ„ฐ ์ƒ์„ฑ
def create_excel_download(df):
"""์—‘์…€ ๋‹ค์šด๋กœ๋“œ์šฉ ๋ฐ์ดํ„ฐ ์ƒ์„ฑ"""
output = io.BytesIO()
with pd.ExcelWriter(output, engine="openpyxl") as writer:
df.to_excel(writer, index=False)
processed_data = output.getvalue()
return processed_data
def clean_column_names(df):
"""์ปฌ๋Ÿผ๋ช… ์ •๋ฆฌ"""
clean_cols = {}
for col in df.columns:
# ์•ž์ชฝ ๋ฒˆํ˜ธ์™€ ์  ์ œ๊ฑฐ
clean_col = re.sub(r"^\d+\.\s*", "", str(col))
clean_col = clean_col.strip()
clean_cols[col] = clean_col
return clean_cols
# ํ˜„์žฌ ๋ณ€์ˆ˜ ์ˆœ์„œ ํ‘œ์‹œ ํ•จ์ˆ˜๋ฅผ ์ˆ˜์ •
def display_current_order():
"""๋ณ€์ˆ˜ ์ˆœ์„œ ํ‘œ์‹œ ํ•จ์ˆ˜"""
st.write("### ํ˜„์žฌ ๋ณ€์ˆ˜ ๊ตฌ์„ฑ")
# ํ˜„์žฌ ์ˆœ์„œ ๊ฐ€์ ธ์˜ค๊ธฐ (์ž„์‹œ ์ˆœ์„œ๊ฐ€ ์žˆ์œผ๋ฉด ์‚ฌ์šฉ)
current_order = (
st.session_state.temp_column_order
if "temp_column_order" in st.session_state
else st.session_state.column_order
)
# ํ˜„์žฌ ๊ทธ๋ฃน ์ˆœ์„œ์— ๋”ฐ๋ผ ์ปฌ๋Ÿผ ๋ฐฐ์น˜
if st.session_state.current_group_order[0] == "๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜":
col1_title = "๐Ÿ“‹ ๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜"
col2_title = "๐Ÿ“Š ์„œ๋ฒ ์ด ์‘๋‹ต ๋ณ€์ˆ˜"
col1_cols = [
c for c in current_order if c in st.session_state.personal_info_cols
]
col2_cols = [
c for c in current_order if c not in st.session_state.personal_info_cols
]
else:
col1_title = "๐Ÿ“Š ์„œ๋ฒ ์ด ์‘๋‹ต ๋ณ€์ˆ˜"
col2_title = "๐Ÿ“‹ ๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜"
col1_cols = [
c for c in current_order if c not in st.session_state.personal_info_cols
]
col2_cols = [
c for c in current_order if c in st.session_state.personal_info_cols
]
col1, col2 = st.columns(2)
with col1:
st.write(f"##### {col1_title}")
for i, col in enumerate(col1_cols, 1):
st.write(f"{i}. {col}")
st.info(f"์ด {len(col1_cols)}๊ฐœ ๋ณ€์ˆ˜")
with col2:
st.write(f"##### {col2_title}")
for i, col in enumerate(col2_cols, 1):
st.write(f"{i}. {col}")
st.info(f"์ด {len(col2_cols)}๊ฐœ ๋ณ€์ˆ˜")
# ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ ํ‘œ์‹œ ํ•จ์ˆ˜
def display_data_preview():
"""๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ ํ‘œ์‹œ ํ•จ์ˆ˜"""
st.write("### ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
if st.session_state.combined_survey is not None:
# ํ˜„์žฌ ์ปฌ๋Ÿผ ์ˆœ์„œ ์‚ฌ์šฉ (temp_column_order๊ฐ€ ์žˆ์œผ๋ฉด ์šฐ์„  ์‚ฌ์šฉ)
current_columns = (
st.session_state.temp_column_order
if "temp_column_order" in st.session_state
else st.session_state.final_column_order
if "final_column_order" in st.session_state
else st.session_state.column_order
)
st.dataframe(st.session_state.combined_survey[current_columns].head())
# ๋งค์นญ ํ‚ค ๋ฐ์ดํ„ฐ ์ •๋ฆฌ ํ•จ์ˆ˜ ์ˆ˜์ •
def clean_matching_key(df, key_columns):
"""๋งค์นญ ํ‚ค ๋ฐ์ดํ„ฐ ์ •๋ฆฌ"""
try:
# ๋จผ์ € key_columns์ด df์— ์žˆ๋Š”์ง€ ํ™•์ธ
missing_cols = [col for col in key_columns if col not in df.columns]
if missing_cols:
st.error(f"๋‹ค์Œ ์ปฌ๋Ÿผ์ด ๋ฐ์ดํ„ฐ์— ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค: {', '.join(missing_cols)}")
return None
df_clean = df.copy()
# ํ‚ค ์ปฌ๋Ÿผ ๋ฐ์ดํ„ฐ ์ •๋ฆฌ
for col in key_columns:
# null ๊ฐ’ ์ฒ˜๋ฆฌ
df_clean[col] = df_clean[col].fillna("")
# ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜
df_clean[col] = df_clean[col].astype(str)
# ๊ณต๋ฐฑ ์ œ๊ฑฐ
df_clean[col] = df_clean[col].str.strip()
# ํŠน์ˆ˜๋ฌธ์ž ์ œ๊ฑฐ
df_clean[col] = df_clean[col].str.replace(r"[^\w\s]", "", regex=True)
# ๋Œ€์†Œ๋ฌธ์ž ํ†ต์ผ
df_clean[col] = df_clean[col].str.upper()
return df_clean
except Exception as e:
st.error(f"ํ‚ค ๋ฐ์ดํ„ฐ ์ •๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
return None
# ๋งค์นญ ํ‚ค ์„ฑ ํ•จ์ˆ˜ ์ˆ˜์ •
def create_match_key(df, key_columns):
"""๋งค์นญ ํ‚ค ์ƒ์„ฑ"""
try:
# null ์ฒดํฌ
if df[key_columns].isnull().any().any():
st.warning("๋งค์นญ ํ‚ค ์ปฌ๋Ÿผ์— ๋ˆ„๋ฝ ๊ฐ’์ด ์žˆ์Šต๋‹ˆ๋‹ค.")
# ํ‚ค ์ปฌ๋Ÿผ๋“ค์„ ๊ฒฐํ•ฉํ•˜์—ฌ ํ•˜๋‚˜์˜ ๋งค์นญ ํ‚ค ์ƒ์„ฑ
return (
df[key_columns].fillna("").astype(str).apply(lambda x: "_".join(x), axis=1)
)
except Exception as e:
st.error(f"๋งค์นญ ํ‚ค ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
return None
def survey_integration():
"""๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํ†ตํ•ฉ ๊ธฐ๋Šฅ"""
try:
st.header("๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํ†ตํ•ฉ")
# ์„ธ์…˜ ์ƒํƒœ ์ดˆ๊ธฐํ™”
if "survey_files_processed" not in st.session_state:
st.session_state.survey_files_processed = False
if "combined_survey" not in st.session_state:
st.session_state.combined_survey = None
if "personal_info_cols" not in st.session_state:
st.session_state.personal_info_cols = []
if "first_df_columns" not in st.session_state:
st.session_state.first_df_columns = None
if "column_order" not in st.session_state:
st.session_state.column_order = []
if "final_column_order" not in st.session_state:
st.session_state.final_column_order = []
# ์„ธ์…˜ ์ƒํƒœ ์ดˆ๊ธฐํ™” ๋ถ€๋ถ„์— show_target_section ์ถ”๊ฐ€
if "show_target_section" not in st.session_state:
st.session_state.show_target_section = False
# 1. ๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํŒŒ์ผ ํ†ตํ•ฉ
st.markdown(
'<p class="custom-subheader">Daily Survey File Upload</p>',
unsafe_allow_html=True,
)
st.info("๋™์ผํ•œ ํ˜•์‹์˜ Daily Survey ํŒŒ์ผ์„ ์ผ์ฐจ๋ณ„๋กœ ์„ ํƒํ•ด์ฃผ์„ธ์š”")
survey_files = st.file_uploader(
"๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํŒŒ์ผ๋“ค์„ ์—…๋กœ๋“œํ•˜์„ธ์š”",
type=["xlsx", "xls", "csv"],
accept_multiple_files=True,
key="survey_files_upload",
)
if survey_files: # ํŒŒ์ผ์ด ์—…๋กœ๋“œ๋œ ๊ฒฝ์šฐ
first_df = read_uploaded_file(survey_files[0]) # ์ฒซ ๋ฒˆ์งธ ํŒŒ์ผ ์ฝ๊ธฐ
if first_df is not None:
# ์ฒซ๋ฒˆ์งธ ํŒŒ์ผ ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
st.write("### First Data Preview")
st.dataframe(first_df.head())
st.write("### ๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ ์„ ํƒ")
# ์ปฌ๋Ÿผ ์„ ํƒ ์ƒํƒœ ์ดˆ๊ธฐํ™”
if "temp_selected_cols" not in st.session_state:
st.session_state.temp_selected_cols = []
col1, col2 = st.columns([3, 1])
with col1:
st.info("์งˆ๋ฌธ์„ ์ œ์™ธํ•˜๊ณ  '๋ฐ˜๋ณต๋˜๋Š” ๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ'์„ ๋ชจ๋‘ ์„ ํƒํ•ด์ฃผ์„ธ์š”.")
# ๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ ์„ ํƒ UI
st.session_state.temp_selected_cols = st.multiselect(
"๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ ์„ ํƒ",
options=first_df.columns.tolist(),
default=st.session_state.personal_info_cols,
help="Ctrl(Cmd)ํ‚ค๋ฅผ ๋ˆ„๋ฅธ ์ฑ„๋กœ ํด๋ฆญํ•˜์—ฌ ์—ฌ๋Ÿฌ ์ปฌ๋Ÿผ์„ ํ•œ ๋ฒˆ์— ์„ ํƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.",
)
with col2:
st.write("")
st.write("")
if st.button("์„ ํƒ ์™„๋ฃŒ", use_container_width=True):
if not st.session_state.temp_selected_cols:
st.error("์ตœ์†Œ 1๊ฐœ ์ด์ƒ์˜ ๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”.")
else:
st.session_state.personal_info_cols = (
st.session_state.temp_selected_cols.copy()
)
st.success(
f"{len(st.session_state.personal_info_cols)}๊ฐœ ์ปฌ๋Ÿผ์ด ์„ ํƒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค:"
)
for col in st.session_state.personal_info_cols:
st.write(f"- {col}")
# ํ™•์ • ๋ฒ„ํŠผ
if st.button("๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ ํ™•์ •"):
if not st.session_state.temp_selected_cols:
st.error("์„ ํƒ๋œ ์ปฌ๋Ÿผ์ด ์—†์Šต๋‹ˆ๋‹ค.")
else:
st.session_state.personal_info_cols = (
st.session_state.temp_selected_cols.copy()
)
st.success(
f"์„ ํƒ๋œ ๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ ({len(st.session_state.personal_info_cols)}๊ฐœ): "
+ ", ".join(st.session_state.personal_info_cols)
)
st.rerun()
if st.session_state.personal_info_cols:
with st.spinner("์„œ๋ฒ ์ด ํŒŒ์ผ ์ฒ˜๋ฆฌ ์ค‘..."):
all_surveys = []
processed_count = 0
# 1๋‹จ๊ณ„: ๋ชจ๋“  ํŒŒ์ผ์˜ ์ปฌ๋Ÿผ ๋งคํ•‘ ์ƒ์„ฑ
column_mapping = {} # ์›๋ณธ ์ปฌ๋Ÿผ๋ช… -> ์ •๋ฆฌ๋œ ์ปฌ๋Ÿผ๋ช…
# ๊ฐ ํŒŒ์ผ์˜ ์ปฌ๋Ÿผ๋ช…์„ ์ˆ˜์ง‘ํ•˜๊ณ  ๋งคํ•‘ ์ƒ์„ฑ
for file in survey_files:
df = read_uploaded_file(file, show_success=False)
if df is not None:
# ์งˆ๋ฌธ ์ปฌ๋Ÿผ๋งŒ ์ฒ˜๋ฆฌ
for col in df.columns:
if col not in st.session_state.personal_info_cols:
clean_col = re.sub(
r"^\d+\.\s*", "", str(col)
).strip()
if col not in column_mapping:
column_mapping[col] = clean_col
elif column_mapping[col] != clean_col:
st.warning(
f"์ฃผ์˜: ์ปฌ๋Ÿผ '{col}'์ด(๊ฐ€) ๋‹ค๋ฅธ ํ˜•์‹์œผ ๋‚˜ํƒ€๋‚ฉ๋‹ˆ๋‹ค: '{clean_col}' vs '{column_mapping[col]}'"
)
# 2๋‹จ๊ณ„: ๊ฐ ํŒŒ์ผ ์˜ ๋ฐ์ดํ„ฐ ํ†ตํ•ฉ
for file in survey_files:
df = read_uploaded_file(file, show_success=False)
if df is not None:
# ๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ ๋ณต์‚ฌ (๋ชจ๋“  ์ปฌ๋Ÿผ์ด ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ)
missing_cols = [
col
for col in st.session_state.personal_info_cols
if col not in df.columns
]
if missing_cols:
st.warning(
f"ํŒŒ์ผ '{file.name}'์—์„œ ๋‹ค์Œ ๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ์ด ๋ˆ„๋ฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค: {', '.join(missing_cols)}"
)
continue
df_clean = pd.DataFrame()
# ๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ ๋ณต์‚ฌ
for col in st.session_state.personal_info_cols:
df_clean[col] = df[col]
# ์งˆ๋ฌธ ์ปฌ๋Ÿผ ์ฒ˜๋ฆฌ
for col in df.columns:
if col not in st.session_state.personal_info_cols:
if col in column_mapping:
clean_col = column_mapping[col]
df_clean[clean_col] = df[col]
all_surveys.append(df_clean)
processed_count += 1
if all_surveys:
# ๋ชจ๋“  ์ดํ„ฐํ”„๋ ˆ์ž„์„ ๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ์„ ๊ธฐ์ค€์œผ๋กœ ๋ณ‘ํ•ฉ
combined_df = all_surveys[0]
for df in all_surveys[1:]:
combined_df = pd.merge(
combined_df,
df,
on=st.session_state.personal_info_cols,
how="outer",
)
# column_order ์—…๋ฐ์ดํŠธ (๊ฐœ์ธ์ •๋ณด ๋Ÿผ + ์ •๋ ฌ๋œ ์งˆ๋ฌธ ์ปฌ)
question_cols = sorted(set(column_mapping.values()))
st.session_state.column_order = (
st.session_state.personal_info_cols + question_cols
)
st.session_state.combined_survey = combined_df[
st.session_state.column_order
]
st.success(f"์ด {processed_count}๊ฐœ์˜ ์„œ๋ฒ ์ด ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์ฒ˜๋ฆฌ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
# ํ†ตํ•ฉ๋œ ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
st.write("### ํ†ตํ•ฉ๋œ ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
total_responses = len(st.session_state.combined_survey)
st.write(f"์ด {total_responses}๊ฐœ์˜ ์‘๋‹ต์ด ํ†ตํ•ฉ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
st.dataframe(st.session_state.combined_survey.head())
# ์ดˆ๊ธฐ column_order ์„ค์ • (์ฒ˜์Œ ํ†ตํ•ฉํ•  ๋•Œ๋งŒ)
if "column_order" not in st.session_state:
st.session_state.column_order = list(
st.session_state.combined_survey.columns
)
# ํŒŒ์ผ๋ณ„ ์‘๋‹ต ์ˆ˜ ์ •๋ณด๋ฅผ ํ…Œ์ด๋ธ”๋กœ ํ‘œ์‹œ
st.write("### ํŒŒ์ผ๋ณ„ ์‘๋‹ต ์ˆ˜")
# CSS๋กœ ํ…Œ์ด๋ธ” ๋‚ด ํ…์ŠคํŠธ ๊ฐ€์šด๋ฐ ์ •๋ ฌ
st.markdown(
"""
<style>
[data-testid="stDataFrame"] div[data-testid="stDataFrameCell"] {
justify-content: center;
}
</style>
""",
unsafe_allow_html=True,
)
# ์‘๋‹ต ์ˆ˜ ๋ฐ์ดํ„ฐ ์ƒ์„ฑ
response_data = pd.DataFrame(
[[len(df) for df in all_surveys]], index=["์‘๋‹ต ์ˆ˜"]
)
# ์ปฌ๋Ÿผ๋ช…์„ 'ํŒŒ์ผ 1', 'ํŒŒ์ผ 2' ๋“ฑ์œผ๋กœ ์„ค์ •
response_data.columns = [
f"ํŒŒ์ผ {i+1}" for i in range(len(all_surveys))
]
# ๋ฐ์ดํ„ฐ๋ ˆ์ž„ ํ‘œ์‹œ
st.dataframe(
response_data,
column_config={
col: st.column_config.Column(width=80)
for col in response_data.columns
},
hide_index=False,
)
# ๋ณ€์ˆ˜ ์ •๋ ฌ ๊ธฐ๋Šฅ
st.subheader("2. ๋ณ€์ˆ˜ ์ˆœ์„œ ์กฐ์ •")
# ๋ณ€์ˆ˜ ๊ทธ๋ฃน ์„ค์ •
personal_cols = st.session_state.personal_info_cols
survey_cols = [
col
for col in st.session_state.column_order
if col not in personal_cols
]
st.write("### ๋ณ€์ˆ˜ ์ˆœ์„œ ๊ด€๋ฆฌ")
tab1, tab2 = st.tabs(["๐Ÿ“Š ์ผ๊ด„ ๋ณ€๊ฒฝ", "๐Ÿ”„ ์„ธ๋ถ€ ์กฐ์ •"])
with tab1:
st.write("##### ๋ณ€์ˆ˜ ๊ทธ๋ฃน๋ณ„ ์ˆœ์„œ ์„ค์ •")
if "current_group_order" not in st.session_state:
st.session_state.current_group_order = [
"๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜",
"์„œ๋ฒ ์ด ์‘๋‹ต ๋ณ€์ˆ˜",
]
if st.session_state.combined_survey is not None:
personal_cols = st.session_state.personal_info_cols
survey_cols = [
col
for col in st.session_state.column_order
if col
not in st.session_state.personal_info_cols
]
group_order = st.multiselect(
"๋ณ€์ˆ˜ ๊ทธ๋ฃน ์ˆœ์„œ๋ฅผ ์„ ํƒํ•˜์„ธ์š”",
options=["๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜", "์„œ๋ฒ ์ด ์‘๋‹ต ๋ณ€์ˆ˜"],
default=st.session_state.current_group_order,
help="๋ณ€์ˆ˜ ๊ทธ๋ฃน์˜ ์ˆœ์„œ๋ฅผ ๋“œ๋ž˜๊ทธํ•˜์—ฌ ๋ณ€๊ฒฝํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค",
)
# ํ˜„์žฌ ๊ทธ๋ฃน๋ณ„ ๋ณ€์ˆ˜ ์ˆ˜ ํ‘œ์‹œ
st.info(
f"""
ํ˜„์žฌ ๋ณ€์ˆ˜ ์„ฑ:
- ๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜: {len(personal_cols)}๊ฐœ
- ์„œ๋ฒ ์ด ์‘๋‹ต ๋ณ€์ˆ˜: {len(survey_cols)}๊ฐœ
"""
)
if st.button("๊ทธ๋ฃน ์ˆœ์„œ ์ ์šฉ"):
if not group_order: # group_order๊ฐ€ ๋น„์–ด์žˆ๋Š” ๊ฒฝ์šฐ ์ฒดํฌ
st.error("๋ณ€์ˆ˜ ๊ทธ๋ฃน์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”.")
elif (
len(group_order) != 2
): # ๋‘ ๊ทธ๋ฃน์ด ๋ชจ๋‘ ์„ ํƒ๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ
st.error("๋‘ ๊ทธ๋ฃน์„ ๋ชจ๋‘ ์„ ํƒํ•ด์ฃผ์„ธ์š”.")
else:
try:
new_order = []
for group in group_order:
if group == "๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜":
new_order.extend(personal_cols)
elif group == "์„œ๋ฒ ์ด ์‘๋‹ต ๋ณ€์ˆ˜":
new_order.extend(
sorted(survey_cols)
)
if new_order:
# session_state ์—…๋ฐ์ดํŠธ
st.session_state.column_order = (
new_order.copy()
)
st.session_state.temp_column_order = (
new_order.copy()
)
st.session_state.current_group_order = (
group_order
)
# ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ปฌ๋Ÿผ ์ˆœ์„œ ๋ณ€๊ฒฝ
if all(
col
in st.session_state.combined_survey.columns
for col in new_order
):
st.session_state.combined_survey = st.session_state.combined_survey[
new_order
]
st.success("๋ณ€์ˆ˜ ๊ทธ๋ฃน ์ˆœ์„œ๊ฐ€ ๋ณ€๊ฒฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
# ๋ณ€๊ฒฝ๋œ ์ˆœ์„œ ํ‘œ์‹œ
display_current_order()
display_data_preview()
else:
missing_cols = [
col
for col in new_order
if col
not in st.session_state.combined_survey.columns
]
st.error(
f"๋‹ค์Œ ์ปฌ๋Ÿผ์ด ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์— ์—†์Šต๋‹ˆ๋‹ค: {missing_cols}"
)
else:
st.error("์ƒˆ๋กœ์šด ์ˆœ์„œ๋ฅผ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
except Exception as e:
st.error(
f"์ˆœ์„œ ๋ณ€๊ฒฝ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
)
# ๋ณ€์ˆ˜ ์ˆœ์„œ ํ™•์ • ๋ฒ„ํŠผ ์ˆ˜์ •
if st.button(
"ํ˜„์žฌ ๋ณ€์ˆ˜ ์ˆœ์„œ ํ™•์ •",
type="primary",
use_container_width=True,
key="confirm_order_bulk",
):
st.session_state.final_column_order = (
st.session_state.column_order.copy()
)
st.session_state.temp_column_order = (
st.session_state.column_order.copy()
)
st.session_state.combined_survey = (
st.session_state.combined_survey[
st.session_state.column_order
]
)
st.success("โœ… ๋ณ€์ˆ˜ ์ˆœ์„œ๊ฐ€ ํ™•์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
st.session_state.show_target_section = True
st.rerun()
# ์ผ๊ด„ ๋ณ€๊ฒฝ ํƒญ ๋‚ด๋ถ€์— ๋Œ€์ƒ์ž ๋ช…๋‹จ ํ†ตํ•ฉ ์„น์…˜ ์ถ”๊ฐ€
st.markdown("---")
st.subheader("3. ๋Œ€์ƒ์ž ๋ช…๋‹จ๊ณผ ํ†ตํ•ฉ")
st.info("๐Ÿ‘ฅ ํ†ตํ•ฉ๋œ ์„œ๋ฒ ์ด ๋ฐ์ดํ„ฐ์™€ ๋Œ€์ƒ์ž ๋ช…๋‹จ์„ ๋งค์นญํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
if "final_column_order" in st.session_state:
target_file = st.file_uploader(
"๋Œ€์ƒ์ž ๋ช…๋‹จ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”",
type=["xlsx", "xls", "csv"],
key="target_file_upload_detail",
)
if target_file:
target_df = read_uploaded_file(target_file)
if target_df is not None:
st.write("### ๋Œ€์ƒ์ž ๋ช…๋‹จ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
st.dataframe(target_df.head())
# ํ‚ค ๋งค์นญ ์„ค์ • UI
col1, col2 = st.columns(2)
with col1:
st.write("### ์„œ๋ฒ ์ด ๋ฐ์ดํ„ฐ ํ‚ค ์„ ํƒ")
# ํ™•์ •๋œ ์ˆœ์„œ๊ฐ€ ์žˆ์œผ๋ฉด ๊ทธ ์ˆœ์„œ๋ฅผ ์‚ฌ์šฉ
survey_columns = (
st.session_state.final_column_order
if "final_column_order"
in st.session_state
else st.session_state.column_order
)
survey_keys = st.multiselect(
"๋งค์นญํ•  ์„œ๋ฒ ์ด ๋ฐ์ดํ„ฐ์˜ ์ปฌ๋Ÿผ์„ ์„ ํƒํ•˜์„ธ์š”",
survey_columns,
max_selections=2,
help="ํšŒ์‚ฌ, ์‚ฌ๋ฒˆ๊ณผ ๊ฐ™์ด ๊ณ ์œ ํ•œ ์‹๋ณ„์ด ๊ฐ€๋Šฅํ•œ ์ปฌ๋Ÿผ 2๊ฐœ๋ฅผ ์„ ํƒํ•˜์„ธ์š”",
)
with col2:
st.write("### ๋Œ€์ƒ์ž ๋ช…๋‹จ ํ‚ค ์„ ํƒ")
target_keys = st.multiselect(
"๋งค์นญํ•  ๋Œ€์ƒ์ž ๋ช…๋‹จ์˜ ์ปฌ๋Ÿผ์„ ์„ ํƒํ•˜์„ธ์š”",
target_df.columns.tolist(),
max_selections=2,
help="์„œ๋ฒ ์ด ๋ฐ์ดํ„ฐ์™€ ๋งค์นญ๋  ์ปฌ๋Ÿผ 2๊ฐœ๋ฅผ ์„ ํƒํ•˜์„ธ์š”",
)
if (
len(survey_keys) == 2
and len(target_keys) == 2
):
try:
# ์›๋ณธ ๋ฐ์ดํ„ฐ ๋ณต์‚ฌ
survey_df = (
st.session_state.combined_survey.copy()
)
target_df_copy = target_df.copy()
# ๋ฐ์ดํ„ฐ ์ •๋ฆฌ ํ•จ์ˆ˜
def clean_key_value(value):
# ๊ฐ’์„ ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜
value = str(value)
# ์†Œ์ˆ˜์  ์ œ๊ฑฐ (.0 ์ œ๊ฑฐ)
if value.endswith(".0"):
value = value[:-2]
# ๊ณต๋ฐฑ ์ œ๊ฑฐ
value = value.strip()
# ๋ชจ๋“  ๊ณต๋ฐฑ ์ œ๊ฑฐ
value = value.replace(" ", "")
# ๋Œ€๋ฌธ์ž ๋ณ€ํ™˜
value = value.upper()
return value
# ๋ฐ์ดํ„ฐ ์ •๋ฆฌ
for col in survey_keys:
survey_df[col] = survey_df[
col
].fillna("")
survey_df[col] = survey_df[
col
].apply(clean_key_value)
for col in target_keys:
target_df_copy[
col
] = target_df_copy[col].fillna(
""
)
target_df_copy[
col
] = target_df_copy[col].apply(
clean_key_value
)
# ๋งค์นญ ํ‚ค ์ƒ์„ฑ
survey_df["match_key"] = survey_df[
survey_keys
].agg("_".join, axis=1)
target_df_copy[
"match_key"
] = target_df_copy[target_keys].agg(
"_".join, axis=1
)
# ๋ฐ์ดํ„ฐ ํ†ตํ•ฉ
final_df = pd.merge(
target_df_copy,
survey_df,
on="match_key",
how="left",
indicator=True,
)
# ๋งค์นญ ๊ฒฐ๊ณผ ๋ถ„์„
total_targets = len(target_df_copy)
matched = len(
final_df[
final_df["_merge"] == "both"
]
)
unmatched = len(
final_df[
final_df["_merge"]
== "left_only"
]
)
col1, col2, col3 = st.columns(3)
col1.metric(
"์ „์ฒด ๋Œ€์ƒ์ž ์ˆ˜", total_targets
)
col2.metric("๋งค์นญ๋œ ๋Œ€์ƒ์ž ์ˆ˜", matched)
col3.metric("๋ฏธ๋งค์นญ ๋Œ€์ƒ์ž ์ˆ˜", unmatched)
# ๋ฏธ๋งค์นญ ๋ฐ์ดํ„ฐ ํ™•์ธ
if unmatched > 0:
st.warning(
"โš ๏ธ ๋งค์นญ๋˜์ง€ ์•Š์€ ๋ฐ์ดํ„ฐ๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค."
)
if st.button("๋ฏธ๋งค์นญ ๋ฐ์ดํ„ฐ ํ™•์ธ"):
unmatched_df = final_df[
final_df["_merge"]
== "left_only"
]
st.dataframe(
unmatched_df[
target_keys
]
)
# ์ตœ์ข… ๋ฐ์ดํ„ฐ ์ •๋ฆฌ
final_df = final_df.drop(
["match_key", "_merge"], axis=1
)
# ํ™•์ •๋œ ๋ณ€์ˆ˜ ์ˆœ ์ ์šฉ
if (
"final_column_order"
in st.session_state
):
try:
# ๋Œ€์ƒ์ž ๋ช…๋‹จ์˜ ์ปฌ๋Ÿผ๋“ค์„ ์•ž์ชฝ์— ๋ฐฐ์น˜
target_columns = [
col
for col in target_df.columns
if col
not in target_keys
]
# ์‹ค์ œ๋กœ ์กด์žฌํ•˜๋Š” ์„œ๋ฒ ์ด ์ปฌ๋Ÿผ๋งŒ ํ•„ํ„ฐ๋ง
survey_columns = [
col
for col in st.session_state.final_column_order
if col
in final_df.columns
]
# ์ตœ์ข… ์ปฌ๋Ÿผ ์ˆœ์„œ ์„ค์ • (์‹ค์ œ ์กด์žฌํ•˜๋Š” ์ปฌ๋Ÿผ๋งŒ ํฌํ•จ)
final_columns = []
# ๋Œ€์ƒ์ž ๋ช…๋‹จ ์ปฌ๋Ÿผ ์ถ”๊ฐ€
for col in target_columns:
if (
col
in final_df.columns
):
final_columns.append(
col
)
# ์„œ๋ฒ ์ด ์ปฌ๋Ÿผ ์ถ”๊ฐ€
final_columns.extend(
survey_columns
)
# ๋ˆ„๋ฝ๋œ ์ปฌ๋Ÿผ์ด ์žˆ๋‹ค๋ฉด ๋งˆ์ง€๋ง‰์— ์ถ”๊ฐ€
remaining_cols = [
col
for col in final_df.columns
if col
not in final_columns
]
final_columns.extend(
remaining_cols
)
# ์ปฌ๋Ÿผ ์ˆœ์„œ ์žฌ๋ฐฐ์น˜
final_df = final_df[
final_columns
]
except Exception as e:
st.warning(
f"์ปฌ๋Ÿผ ์ˆœ์„œ ์žฌ๋ฐฐ์น˜ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
)
st.info("๊ธฐ๋ณธ ์ˆœ์„œ๋กœ ๋ฐ์ดํ„ฐ๋ฅผ ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.")
st.write("### ์ตœ์ข… ๋ฐ์ดํ„ฐ์…‹ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
st.dataframe(final_df.head())
# ๋‹ค์šด๋กœ๋“œ ๋ฒ„ํŠผ
final_processed_data = (
create_excel_download(final_df)
)
st.download_button(
label="์ตœ์ข… ํ†ตํ•ฉ ๋ฐ์ดํ„ฐ์…‹ ๋‹ค์šด๋กœ๋“œ",
data=final_processed_data,
file_name="์ตœ์ข…_ํ†ตํ•ฉ_๋ฐ์ดํ„ฐ์…‹.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
except Exception as e:
st.error(
f"๋ฐ์ดํ„ฐ ํ†ตํ•ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
)
with tab2:
st.write("##### ๐Ÿ”„ ๊ฐœ๋ณ„ ๋ณ€์ˆ˜ ์ด๋™")
if (
"combined_survey" in st.session_state
and st.session_state.combined_survey is not None
):
# ์ปฌ๋Ÿผ ์ˆœ์„œ ์ดˆ๊ธฐํ™” ํ™•์ธ
if "column_order" not in st.session_state:
st.session_state.column_order = list(
st.session_state.combined_survey.columns
)
# ์ž„์‹œ ์ปฌ๋Ÿผ ์ˆœ์„œ ์ดˆ๊ธฐํ™”
if "temp_column_order" not in st.session_state:
st.session_state.temp_column_order = (
st.session_state.column_order.copy()
)
col1, col2 = st.columns([2, 1])
with col1:
# ๋ณ€์ˆ˜ ์„ ํƒ UI
available_columns = (
st.session_state.temp_column_order
)
if not available_columns:
available_columns = list(
st.session_state.combined_survey.columns
)
selected_cols = st.multiselect(
"์ด๋™ํ•  ๋ณ€์ˆ˜๋“ค์„ ์„ ํƒํ•˜์„ธ์š”",
options=available_columns,
format_func=lambda x: f"๐Ÿ“‹ {x}"
if x in st.session_state.personal_info_cols
else f"๐Ÿ“Š {x}",
key="cols_to_move_select",
)
with col2:
st.write("")
st.write("")
move_type = st.radio(
"์ด๋™ ๋ฐฉ์‹ ์„ ํƒ",
[
"๋งจ ์•ž์œผ๋กœ ์ด๋™ โฌ†๏ธ",
"๋งจ ๋’ค๋กœ ์ด๋™ โฌ‡๏ธ",
"ํŠน์ • ๋ณ€์ˆ˜ ๊ธฐ์ค€ ์ด๋™ โ†•๏ธ",
],
key="move_type_radio",
)
# ํŠน์ • ๋ณ€์ˆ˜ ๊ธฐ์ค€ ์ด๋™์ผ ๋•Œ์˜ ์ถ”๊ฐ€ ์˜ต์…˜
if (
move_type == "ํŠน์ • ๋ณ€์ˆ˜ ๊ธฐ์ค€ ์ด๋™ โ†•๏ธ"
and selected_cols
):
remaining_cols = [
col
for col in available_columns
if col not in selected_cols
]
if remaining_cols:
reference_col = st.selectbox(
"๊ธฐ์ค€ ๋ณ€์ˆ˜ ์„ ํƒ",
options=remaining_cols,
format_func=lambda x: f"๐Ÿ“‹ {x}"
if x
in st.session_state.personal_info_cols
else f"๐Ÿ“Š {x}",
key="reference_col_select",
)
move_position = st.radio(
"์ด๋™ ์œ„์น˜",
options=[
"์„ ํƒํ•œ ๋ณ€์ˆ˜ ์•ž์œผ๋กœโฌ†๏ธ",
"์„ ํƒํ•œ ๋ณ€์ˆ˜ ๋’ค๋กœโฌ‡๏ธ",
],
key="position_radio",
)
else:
st.warning(
"์„ ํƒํ•œ ๋ณ€์ˆ˜๋ฅผ ์ œ์™ธํ•œ ์ด๋™ ๊ฐ€๋Šฅํ•œ ๊ธฐ์ค€ ๋ณ€์ˆ˜๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
)
elif move_type == "ํŠน์ • ๋ณ€์ˆ˜ ๊ธฐ์ค€ ์ด๋™ โ†•๏ธ":
st.info("์ด๋™ํ•  ๋ณ€์ˆ˜๋ฅผ ๋จผ์ € ์„ ํƒํ•ด์ฃผ์„ธ์š”.")
# ๋ณ€์ˆ˜ ์ด๋™ ์‹คํ–‰ ๋ฒ„ํŠผ
move_button_disabled = not selected_cols or (
move_type == "ํŠน์ • ๋ณ€์ˆ˜ ๊ธฐ์ค€ ์ด๋™ โ†•๏ธ"
and (
not remaining_cols
if "remaining_cols" in locals()
else True
)
)
if st.button(
"์„ ํƒํ•œ ๋ณ€์ˆ˜ ์ด๋™",
key="move_selected_vars",
use_container_width=True,
disabled=move_button_disabled,
):
try:
new_order = [
col
for col in st.session_state.temp_column_order
if col not in selected_cols
]
if move_type == "๋งจ ์•ž์œผ๋กœ ์ด๋™ โฌ†๏ธ":
new_order = selected_cols + new_order
elif move_type == "๋งจ ๋’ค๋กœ ์ด๋™ โฌ‡๏ธ":
new_order = new_order + selected_cols
elif (
move_type == "ํŠน์ • ๋ณ€์ˆ˜ ๊ธฐ์ค€ ์ด๋™ โ†•๏ธ"
and "reference_col_select"
in st.session_state
):
ref_idx = new_order.index(
st.session_state.reference_col_select
)
if (
st.session_state.position_radio
== "์„ ํƒํ•œ ๋ณ€์ˆ˜ ์•ž์œผ๋กœโฌ†๏ธ"
):
new_order = (
new_order[:ref_idx]
+ selected_cols
+ new_order[ref_idx:]
)
else:
new_order = (
new_order[: ref_idx + 1]
+ selected_cols
+ new_order[ref_idx + 1 :]
)
# ์ˆœ์„œ ์—…๋ฐ์ดํŠธ
st.session_state.temp_column_order = (
new_order
)
st.session_state.combined_survey = (
st.session_state.combined_survey[
new_order
]
)
st.success("โœ… ๋ณ€์ˆ˜ ์ˆœ์„œ๊ฐ€ ๋ณ€๊ฒฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
# ํ˜„์žฌ ์ˆœ์„œ ๋ฐ ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ ํ‘œ์‹œ
display_current_order()
st.write("### ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
st.dataframe(
st.session_state.combined_survey[
new_order
].head()
)
st.rerun()
except Exception as e:
st.error(f"๋ณ€์ˆ˜ ์ด๋™ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}")
else:
# ํ˜„์žฌ ์ˆœ์„œ ๋ฐ ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ ํ‘œ์‹œ
st.write("### ํ˜„์žฌ ๋ณ€์ˆ˜ ์ˆœ์„œ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
display_current_order()
st.write("### ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
st.dataframe(
st.session_state.combined_survey[
st.session_state.temp_column_order
].head()
)
else:
st.warning("๋จผ์ € ๋ฐ์ดํ„ฐ๋ฅผ ์—…๋กœ๋“œํ•˜๊ณ  ๊ฐœ์ธ์ •๋ณด ์ปฌ๋Ÿผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”.")
# ๋ณ€์ˆ˜ ์ˆœ์„œ ํ™•์ • ๋ฒ„ํŠผ
confirm_button = st.button(
"ํ˜„์žฌ ๋ณ€์ˆ˜ ์ˆœ์„œ ํ™•์ •",
type="primary",
use_container_width=True,
key="confirm_order_detail",
)
if confirm_button:
st.session_state.final_column_order = (
st.session_state.temp_column_order.copy()
)
st.session_state.column_order = (
st.session_state.temp_column_order.copy()
)
st.session_state.combined_survey = (
st.session_state.combined_survey[
st.session_state.temp_column_order
]
)
st.session_state.show_target_section = True
st.success("โœ… ๋ณ€์ˆ˜ ์ˆœ์„œ๊ฐ€ ํ™•์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
st.rerun() # ํŽ˜์ด์ง€ ๋ฆฌ๋กœ๋“œ
# ๋Œ€์ƒ์ž ํ†ตํ•ฉ ์„น์…˜ ์ถ”๊ฐ€
if "final_column_order" in st.session_state:
st.markdown("---")
st.subheader("3. ๋Œ€์ƒ์ž ๋ช…๋‹จ๊ณผ ํ†ตํ•ฉ")
st.info("๐Ÿ‘ฅ ํ†ตํ•ฉ๋œ ์„œ๋ฒ ์ด ๋ฐ์ดํ„ฐ์™€ ๋Œ€์ƒ์ž ๋ช…๋‹จ์„ ๋งค์นญํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
target_file = st.file_uploader(
"๋Œ€์ƒ์ž ๋ช…๋‹จ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”",
type=["xlsx", "xls", "csv"],
key="target_file_upload_detail_tab2", # ํ‚ค ๊ฐ’์„ ๋‹ค๋ฅด๊ฒŒ ์„ค์ •
)
if target_file:
target_df = read_uploaded_file(target_file)
if target_df is not None:
st.write("### ๋Œ€์ƒ์ž ๋ช…๋‹จ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
st.dataframe(target_df.head())
# ํ‚ค ๋งค์นญ ์„ค์ • UI
col1, col2 = st.columns(2)
with col1:
st.write("### ์„œ๋ฒ ์ด ๋ฐ์ดํ„ฐ ํ‚ค ์„ ํƒ")
# ํ™•์ •๋œ ์ˆœ์„œ๊ฐ€ ์žˆ์œผ๋ฉด ๊ทธ ์ˆœ์„œ๋ฅผ ์‚ฌ์šฉ
survey_columns = (
st.session_state.final_column_order
if "final_column_order"
in st.session_state
else st.session_state.column_order
)
survey_keys = st.multiselect(
"๋งค์นญํ•  ์„œ๋ฒ ์ด ๋ฐ์ดํ„ฐ์˜ ์ปฌ๋Ÿผ์„ ์„ ํƒํ•˜์„ธ์š”",
survey_columns,
max_selections=2,
help="ํšŒ์‚ฌ, ์‚ฌ๋ฒˆ๊ณผ ๊ฐ™์ด ๊ณ ์œ ํ•œ ์‹๋ณ„์ด ๊ฐ€๋Šฅํ•œ ์ปฌ๋Ÿผ 2๊ฐœ๋ฅผ ์„ ํƒํ•˜์„ธ์š”",
)
with col2:
st.write("### ๋Œ€์ƒ์ž ๋ช…๋‹จ ํ‚ค ์„ ํƒ")
target_keys = st.multiselect(
"๋งค์นญํ•  ๋Œ€์ƒ์ž ๋ช…๋‹จ์˜ ์ปฌ๋Ÿผ์„ ์„ ํƒํ•˜์„ธ์š”",
target_df.columns.tolist(),
max_selections=2,
help="์„œ๋ฒ ์ด ๋ฐ์ดํ„ฐ์™€ ๋งค์นญ๋  ์ปฌ๋Ÿผ 2๊ฐœ๋ฅผ ์„ ํƒํ•˜์„ธ์š”",
)
if (
len(survey_keys) == 2
and len(target_keys) == 2
):
try:
# ์›๋ณธ ๋ฐ์ดํ„ฐ ๋ณต์‚ฌ
survey_df = (
st.session_state.combined_survey.copy()
)
target_df_copy = target_df.copy()
# ๋ฐ์ดํ„ฐ ์ •๋ฆฌ ํ•จ์ˆ˜
def clean_key_value(value):
# ๊ฐ’์„ ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜
value = str(value)
# ์†Œ์ˆ˜์  ์ œ๊ฑฐ (.0 ์ œ๊ฑฐ)
if value.endswith(".0"):
value = value[:-2]
# ๊ณต๋ฐฑ ์ œ๊ฑฐ
value = value.strip()
# ๋ชจ๋“  ๊ณต๋ฐฑ ์ œ๊ฑฐ
value = value.replace(" ", "")
# ๋Œ€๋ฌธ์ž ๋ณ€ํ™˜
value = value.upper()
return value
# ๋ฐ์ดํ„ฐ ์ •๋ฆฌ
for col in survey_keys:
survey_df[col] = survey_df[
col
].fillna("")
survey_df[col] = survey_df[
col
].apply(clean_key_value)
for col in target_keys:
target_df_copy[
col
] = target_df_copy[col].fillna(
""
)
target_df_copy[
col
] = target_df_copy[col].apply(
clean_key_value
)
# ๋งค์นญ ํ‚ค ์ƒ์„ฑ
survey_df["match_key"] = survey_df[
survey_keys
].agg("_".join, axis=1)
target_df_copy[
"match_key"
] = target_df_copy[target_keys].agg(
"_".join, axis=1
)
# ๋ฐ์ดํ„ฐ ํ†ตํ•ฉ
final_df = pd.merge(
target_df_copy,
survey_df,
on="match_key",
how="left",
indicator=True,
)
# ๋งค์นญ ๊ฒฐ๊ณผ ๋ถ„์„
total_targets = len(target_df_copy)
matched = len(
final_df[
final_df["_merge"] == "both"
]
)
unmatched = len(
final_df[
final_df["_merge"]
== "left_only"
]
)
col1, col2, col3 = st.columns(3)
col1.metric(
"์ „์ฒด ๋Œ€์ƒ์ž ์ˆ˜", total_targets
)
col2.metric("๋งค์นญ๋œ ๋Œ€์ƒ์ž ์ˆ˜", matched)
col3.metric("๋ฏธ๋งค์นญ ๋Œ€์ƒ์ž ์ˆ˜", unmatched)
# ๋ฏธ๋งค์นญ ๋ฐ์ดํ„ฐ ํ™•์ธ
if unmatched > 0:
st.warning(
"โš ๏ธ ๋งค์นญ๋˜์ง€ ์•Š์€ ๋ฐ์ดํ„ฐ๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค."
)
if st.button("๋ฏธ๋งค์นญ ๋ฐ์ดํ„ฐ ํ™•์ธ"):
unmatched_df = final_df[
final_df["_merge"]
== "left_only"
]
st.dataframe(
unmatched_df[
target_keys
]
)
# ์ตœ์ข… ๋ฐ์ดํ„ฐ ์ •๋ฆฌ
final_df = final_df.drop(
["match_key", "_merge"], axis=1
)
# ํ™•์ •๋œ ๋ณ€์ˆ˜ ์ˆœ ์ ์šฉ
if (
"final_column_order"
in st.session_state
):
try:
# ๋Œ€์ƒ์ž ๋ช…๋‹จ์˜ ์ปฌ๋Ÿผ๋“ค์„ ์•ž์ชฝ์— ๋ฐฐ์น˜
target_columns = [
col
for col in target_df.columns
if col
not in target_keys
]
# ์‹ค์ œ๋กœ ์กด์žฌํ•˜๋Š” ์„œ๋ฒ ์ด ์ปฌ๋Ÿผ๋งŒ ํ•„ํ„ฐ๋ง
survey_columns = [
col
for col in st.session_state.final_column_order
if col
in final_df.columns
]
# ์ตœ์ข… ์ปฌ๋Ÿผ ์ˆœ์„œ ์„ค์ • (์‹ค์ œ ์กด์žฌํ•˜๋Š” ์ปฌ๋Ÿผ๋งŒ ํฌํ•จ)
final_columns = []
# ๋Œ€์ƒ์ž ๋ช…๋‹จ ์ปฌ๋Ÿผ ์ถ”๊ฐ€
for col in target_columns:
if (
col
in final_df.columns
):
final_columns.append(
col
)
# ์„œ๋ฒ ์ด ์ปฌ๋Ÿผ ์ถ”๊ฐ€
final_columns.extend(
survey_columns
)
# ๋ˆ„๋ฝ๋œ ์ปฌ๋Ÿผ์ด ์žˆ๋‹ค๋ฉด ๋งˆ์ง€๋ง‰์— ์ถ”๊ฐ€
remaining_cols = [
col
for col in final_df.columns
if col
not in final_columns
]
final_columns.extend(
remaining_cols
)
# ์ปฌ๋Ÿผ ์ˆœ์„œ ์žฌ๋ฐฐ์น˜
final_df = final_df[
final_columns
]
except Exception as e:
st.warning(
f"์ปฌ๋Ÿผ ์ˆœ์„œ ์žฌ๋ฐฐ์น˜ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
)
st.info("๊ธฐ๋ณธ ์ˆœ์„œ๋กœ ๋ฐ์ดํ„ฐ๋ฅผ ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.")
st.write("### ์ตœ์ข… ๋ฐ์ดํ„ฐ์…‹ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
st.dataframe(final_df.head())
# ๋‹ค์šด๋กœ๋“œ ๋ฒ„ํŠผ
final_processed_data = (
create_excel_download(final_df)
)
st.download_button(
label="์ตœ์ข… ํ†ตํ•ฉ ๋ฐ์ดํ„ฐ์…‹ ๋‹ค์šด๋กœ๋“œ",
data=final_processed_data,
file_name="์ตœ์ข…_ํ†ตํ•ฉ_๋ฐ์ดํ„ฐ์…‹.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
except Exception as e:
st.error(
f"๋ฐ์ดํ„ฐ ํ†ตํ•ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
)
except Exception as e:
st.error(f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}")
def extract_specific_text(file):
"""PPT์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ"""
try:
prs = Presentation(file)
extracted_texts = []
for slide in prs.slides:
slide_texts = []
for shape in slide.shapes:
if hasattr(shape, "text") and shape.text.strip():
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
for subshape in shape.shapes:
if hasattr(subshape, "text") and subshape.text.strip():
slide_texts.append(subshape.text.strip())
else:
slide_texts.append(shape.text.strip())
if slide_texts:
extracted_texts.extend(slide_texts)
return extracted_texts
except Exception as e:
st.error(f"PPT ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
return []
def clean_text_for_excel(text):
"""์—‘์…€์—์„œ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†๋Š” ๋ฌธ์ž ์ œ๊ฑฐ"""
if not isinstance(text, str):
return text
# ์ค„๋ฐ”๊ฟˆ, ํƒญ, carriage return ์ œ๊ฑฐ
text = text.replace("\n", " ").replace("\r", " ").replace("\t", " ")
# ๋ถˆํ•„์š”ํ•œ ๊ณต๋ฐฑ ์ œ๊ฑฐ
text = re.sub(r"\s+", " ", text).strip()
return text
def extract_responses():
"""PPT ์ฃผ์š”์‘๋‹ต ์ถ”์ถœ ๋ฐ ๋ถ„๋ฅ˜ ๊ธฐ๋Šฅ"""
st.header("PPT ์ฃผ์š”์‘๋‹ต ์ถ”์ถœ ๋ฐ ๋ถ„๋ฅ˜")
# PPT ํŒŒ์ผ ์—…๋กœ๋“œ
uploaded_files = st.file_uploader(
"PPT ํŒŒ์ผ์„ ํƒํ•˜์„ธ์š”", type=["pptx"], accept_multiple_files=True, key="ppt_files"
)
if uploaded_files:
try:
all_texts = []
progress_bar = st.progress(0)
# PPT์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ
for i, file in enumerate(uploaded_files):
texts = extract_specific_text(file)
if texts:
with st.expander(f"ํŒŒ์ผ: {file.name} ์ถ”์ถœ ๊ฒฐ๊ณผ"):
for text in texts:
cleaned_text = clean_text_for_excel(
re.sub(r"\s+", " ", text).strip()
)
st.write(cleaned_text)
all_texts.extend(texts)
progress_bar.progress((i + 1) / len(uploaded_files))
if all_texts:
st.success(f"์ด {len(all_texts)}๊ฐœ์˜ ํ…์ŠคํŠธ๊ฐ€ ์ถ”์ถœ๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
# ์ œ์™ธํ•  ํ…์ŠคํŠธ ์„ค์ •
st.subheader("์ œ์™ธํ•  ํ…์ŠคํŠธ ์„ค์ •")
excluded_texts_input = st.text_area(
"์ œ์™ธํ•  ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š” (์ค„ ๋ฐ”๊ฟˆ์œผ๋กœ ๊ตฌ๋ถ„)", help="์ œ์™ธํ•  ํ…์ŠคํŠธ๋ฅผ ํ•œ ์ค„์”ฉ ์ž…๋ ฅํ•˜์„ธ์š”"
)
excluded_texts = excluded_texts_input.splitlines()
# ์ œ์™ธํ•  ํ…์ŠคํŠธ ํ•„ํ„ฐ๋ง
filtered_texts = [
clean_text_for_excel(text)
for text in all_texts
if text not in excluded_texts
]
st.write("### ํ•„ํ„ฐ๋ง๋œ ํ…์ŠคํŠธ:")
for text in filtered_texts:
st.write(text)
# ์งˆ๋ฌธ๋ณ„ ๋ถ„๋ฅ˜ ์ถ”๊ฐ€
st.subheader("์งˆ๋ฌธ๋ณ„ ๋ถ„๋ฅ˜")
user_questions_input = st.text_area(
"์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š” (์ค„๋ฐ”๊ฟˆ์œผ๋กœ ๊ตฌ๋ถ„)", help="๊ฐ ์งˆ๋ฌธ์ด ํ•˜๋‚˜์˜ ์—ด(ํ—ค๋”)์ด ๋ฉ๋‹ˆ๋‹ค"
)
user_questions = user_questions_input.splitlines()
if user_questions:
# ์งˆ๋ฌธ๋ณ„ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ
question_responses = {question: [] for question in user_questions}
question_responses["๊ธฐํƒ€"] = []
current_question = None
# ํ…์ŠคํŠธ ๋ถ„๋ฅ˜
for text in filtered_texts:
matched = False
for question in user_questions:
if question in text:
current_question = question
matched = True
break
if current_question and not matched:
question_responses[current_question].append(text)
elif not matched and not current_question:
question_responses["๊ธฐํƒ€"].append(text)
# ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ๋ณ€ํ™˜
max_rows = max(
len(responses) for responses in question_responses.values()
)
for question, responses in question_responses.items():
if len(responses) < max_rows:
question_responses[question].extend(
[""] * (max_rows - len(responses))
)
df = pd.DataFrame(question_responses)
st.write("### ์งˆ๋ฌธ๋ณ„ ๋ถ„๋ฅ˜ ๊ฒฐ๊ณผ:")
st.dataframe(df)
# ๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ
output = io.BytesIO()
with pd.ExcelWriter(output, engine="openpyxl") as writer:
df.applymap(clean_text_for_excel).to_excel(writer, index=False)
st.download_button(
label="๋ถ„๋ฅ˜ ๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ",
data=output.getvalue(),
file_name="์งˆ๋ฌธ๋ณ„_๋ถ„๋ฅ˜_๊ฒฐ๊ณผ.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
except Exception as e:
st.error(f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
def analyze_survey_data():
"""๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํ†ต๊ณ„ ๋ถ„์„ ๊ธฐ๋Šฅ"""
# ํŒŒ์Šคํ…”ํ†ค ์ƒ‰์ƒ ํŒ”๋ ˆํŠธ ํ•จ์ˆ˜ ์ˆ˜์ •
def get_gradient_colors(n, color_type="blue"):
"""๊ทธ๋ผ๋ฐ์ด์…˜ ์ƒ‰์ƒ ์ƒ์„ฑ"""
if color_type == "blue":
# ํ•˜๋Š˜์ƒ‰ ๊ณ„์—ด ๊ทธ๋ผ๋ฐ์ด์…˜
base_rgb = (186, 225, 255) # ๊ธฐ๋ณธ ํŒŒ๋ž€์ƒ‰
else:
# ํšŒ์ƒ‰ ๊ณ„์—ด ๊ทธ๋ผ๋ฐ์ด์…˜
base_rgb = (200, 200, 200) # ๊ธฐ๋ณธ ํšŒ์ƒ‰
colors = []
for i in range(n):
opacity = 0.3 + (0.7 * i / (n - 1)) if n > 1 else 1
colors.append(
f"rgba({base_rgb[0]}, {base_rgb[1]}, {base_rgb[2]}, {opacity})"
)
return colors
st.header("๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํ†ต๊ณ„ ๋ถ„์„")
uploaded_file = st.file_uploader(
"ํ†ต๊ณ„ ๋ถ„์„ํ•  ํ†ตํ•ฉ ๋ฐ์ดํ„ฐ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”", type=["xlsx", "xls", "csv"], key="stats_file_upload"
)
if uploaded_file:
df = read_uploaded_file(uploaded_file)
if df is not None:
st.success("๋ฐ์ดํ„ฐ ๋กœ๋“œ ์™„๋ฃŒ!")
# ๊ณผ๋ชฉ ๋ณ€์ˆ˜ ์ž๋™ ์‹๋ณ„ ๋ฐ ๊ทธ๋ฃนํ™”
subject_vars = [col for col in df.columns if "[" in col and "]" in col]
if not subject_vars:
st.warning("๊ณผ๋ชฉ ๊ด€๋ จ ๋ณ€์ˆ˜([๊ณผ๋ชฉ๋ช…])๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
return
# ๊ณผ๋ชฉ๋ช… ์ถ”์ถœ ๋ฐ ๊ทธ๋ฃนํ™”
subject_groups = {}
for col in subject_vars:
subject_name = re.search(r"\[(.*?)\]", col).group(1)
if subject_name not in subject_groups:
subject_groups[subject_name] = []
subject_groups[subject_name].append(col)
# ๊ณผ๋ชฉ๋ณ„ ํ‰๊ท  ๊ณ„์‚ฐ
subject_means = {}
for subject, columns in subject_groups.items():
subject_means[subject] = df[columns].mean(axis=1).mean()
# ๊ณผ๋ชฉ ์ˆœ์„œ ์„ ํƒ UI
st.subheader("1. ์ „์ฒด ๊ณผ๋ชฉ๋ณ„ ํ‰๊ท  ์ ์ˆ˜")
subject_order = st.multiselect(
"๊ณผ๋ชฉ ์ˆœ์„œ๋ฅผ ์„ ํƒํ•˜์„ธ์š”",
options=list(subject_means.keys()),
default=list(subject_means.keys()),
)
if subject_order:
# ์„ ํƒ๋œ ์ˆœ์„œ๋Œ€๋กœ ๋ฐ์ดํ„ฐ ์žฌ๊ตฌ์„ฑ
ordered_means = {
subject: float(subject_means[subject]) for subject in subject_order
} # float๋กœ ์œ ์ง€
# ์ „์ฒด ํ‰๊ท  ๊ณ„์‚ฐ
total_mean = sum(ordered_means.values()) / len(ordered_means)
ordered_means["์ „์ฒด"] = total_mean
# ๊ณผ๋ชฉ๋ณ„ ํ‰๊ท  ์ ์ˆ˜ ํ…Œ์ด๋ธ” ๋ฐ์ดํ„ฐ (์†Œ์ˆ˜์  2์ž๋ฆฌ ๋ฌธ์ž์—ด)
display_means = {k: f"{v:.2f}" for k, v in ordered_means.items()}
mean_df = pd.DataFrame([display_means], index=["ํ‰๊ท ์ ์ˆ˜"])
st.dataframe(mean_df)
# ๊ฐ’์— ๋”ฐ๋ฅธ ์ƒ‰์ƒ ์„ค์ •
values = list(ordered_means.values()) # float ๊ฐ’ ์‚ฌ์šฉ
max_val = max(values)
min_val = min(values)
colors = ["#BAE1FF" for _ in values]
# ์ตœ๋Œ€๊ฐ’๊ณผ ์ตœ์†Œ๊ฐ’ ์ƒ‰์ƒ ๋ณ€๊ฒฝ
for i, v in enumerate(values):
if v == max_val:
colors[i] = "#BAFFC9"
elif v == min_val:
colors[i] = "#FF9B9B"
# ๊ณผ๋ชฉ๋ณ„ ํ‰๊ท  ์ ์ˆ˜ ์ฐจํŠธ
fig = go.Figure(
data=[
go.Bar(
x=list(ordered_means.keys()),
y=values, # float ๊ฐ’ ์‚ฌ์šฉ
text=[f"{v:.2f}" for v in values], # ์—ฌ๊ธฐ์„œ ํฌ๋งทํŒ…
textposition="outside",
marker_color=colors,
textfont=dict(size=12),
)
]
)
fig.update_layout(
title="์ „์ฒด ๊ณผ๋ชฉ๋ณ„ ํ‰๊ท  ์ ์ˆ˜",
xaxis_title="๊ณผ๋ชฉ",
yaxis_title="ํ‰๊ท  ์ ์ˆ˜",
showlegend=False,
bargap=0.3,
uniformtext_minsize=8,
uniformtext_mode="hide",
)
st.plotly_chart(fig)
# ๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜ ์„ ํƒ UI
personal_info_vars = [col for col in df.columns if col not in subject_vars]
selected_personal_vars = st.multiselect(
"๋ถ„์„์— ์‚ฌ์šฉํ•  ๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜๋ฅผ ์„ ํƒํ•˜์„ธ์š”",
options=personal_info_vars,
help="๊ณผ๋ชฉ๋ณ„ ํ‰๊ท  ์ ์ˆ˜ ๋น„๊ต์— ์‚ฌ์šฉํ•  ๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜๋“ค์„ ์„ ํƒํ•˜์„ธ์š”",
)
if selected_personal_vars:
st.subheader("2. ๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜๋ณ„ ๊ณผ๋ชฉ ํ‰๊ท  ๋ถ„์„")
# ๊ณผ๋ชฉ๋ณ„ ํ‰๊ท ์„ ๊ณ„์‚ฐํ•œ ์ƒˆ๋กœ์šด ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ
subject_scores = pd.DataFrame()
for subject in subject_order:
subject_scores[subject] = df[subject_groups[subject]].mean(axis=1)
# ์ „์ฒด ํ‰๊ท  ๊ณ„์‚ฐ
subject_scores["์ „์ฒด"] = subject_scores[subject_order].mean(axis=1)
# ๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜ ์ถ”๊ฐ€
for var in selected_personal_vars:
subject_scores[var] = df[var]
# ๊ฐ ์„ ํƒ๋œ ๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜๋ณ„ ๋ถ„์„
for var in selected_personal_vars:
st.write(f"#### {var}์— ๋”ฐ๋ฅธ ๊ณผ๋ชฉ๋ณ„ ํ‰๊ท  ์ ์ˆ˜")
# ๊ทธ๋ฃน๋ณ„ ํ‰๊ท  ๊ณ„์‚ฐ
ordered_cols = list(ordered_means.keys())
grouped_means = (
subject_scores.groupby(var)[ordered_cols].mean().round(2)
)
# ํ…Œ์ด๋ธ” ํ‘œ์‹œ
st.write("๊ทธ๋ฃน๋ณ„ ํ‰๊ท  ์ ์ˆ˜:")
st.dataframe(grouped_means)
# ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„ 1: ๊ณผ๋ชฉ๋ณ„ ๋ฒ”๋ก€๊ฐ€ ๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜
fig1 = go.Figure()
# ๋ฒ”์ฃผ๋ณ„๋กœ ๋‹ค๋ฅธ ์ƒ‰์ƒ ์‚ฌ์šฉ
colors = get_gradient_colors(len(grouped_means.index))
# ๊ฐ ๋ฒ”์ฃผ๋ณ„๋กœ ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ
for i, category in enumerate(grouped_means.index):
fig1.add_trace(
go.Bar(
name=category, # ๋ฒ”์ฃผ ์ด๋ฆ„
x=ordered_cols, # x์ถ•์„ ๊ณผ๋ชฉ์œผ๋กœ ๋ณ€๊ฒฝ
y=grouped_means.loc[category],
text=[f"{v:.2f}" for v in grouped_means.loc[category]],
textposition="outside",
marker_color=colors[i], # ๋ฒ”์ฃผ๋ณ„ ์ƒ‰์ƒ ์ง€์ •
textfont=dict(size=12),
)
)
fig1.update_layout(
title=f"{var}๋ณ„ ๊ณผ๋ชฉ ํ‰๊ท  ์ ์ˆ˜",
xaxis_title="๊ณผ๋ชฉ",
yaxis_title="ํ‰๊ท  ์ ์ˆ˜",
barmode="group",
bargap=0.15,
bargroupgap=0.1,
uniformtext_minsize=8,
uniformtext_mode="hide",
# x์ถ• ๋ ˆ์ด๋ธ” ํšŒ์ „ ๋ฐ ์œ„์น˜ ์กฐ์ •
xaxis=dict(
tickangle=45,
tickmode="array",
ticktext=ordered_cols,
tickvals=list(range(len(ordered_cols))),
tickfont=dict(size=12),
),
# y์ถ• ๋ฒ”์œ„ ์„ค์ •
yaxis=dict(
range=[0, 5], # ์ตœ๋Œ€๊ฐ’์„ 5๋กœ ์„ค์ •
),
# ์—ฌ๋ฐฑ ์กฐ์ •
margin=dict(
b=150, # ํ•˜๋‹จ ์—ฌ๋ฐฑ
t=100, # ์ƒ๋‹จ ์—ฌ๋ฐฑ ์ถ”๊ฐ€
),
# ๋ฒ”๋ก€ ์œ„์น˜ ์กฐ์ •
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
),
)
st.plotly_chart(fig1)
# ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„ 2: ๊ฐœ์ธ์ •๋ณด ๋ณ€์ˆ˜๋ณ„ ๋ฒ”๋ก€๊ฐ€ ๊ณผ๋ชฉ
fig2 = go.Figure()
# ํšŒ์ƒ‰ ๊ณ„์—ด ๊ทธ๋ผ๋ฐ์ด์…˜ ์ƒ‰์ƒ ์ƒ์„ฑ
colors = get_gradient_colors(len(ordered_cols), color_type="gray")
# ๊ณผ๋ชฉ๋ณ„๋กœ ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ
for i, subject in enumerate(ordered_cols):
# ๊ฐ ๊ณผ๋ชฉ๋ณ„ ๋ฐ์ดํ„ฐ ์ค€๋น„
subject_values = grouped_means[subject]
# ํ˜ธ๋ฒ„ ํ…์ŠคํŠธ ์ƒ์„ฑ
hover_texts = []
for idx, value in enumerate(subject_values):
category_name = grouped_means.index[idx]
hover_text = (
f"{var}: {category_name}<br>{subject}: {value:.2f}"
)
hover_texts.append(hover_text)
fig2.add_trace(
go.Bar(
name=subject,
x=grouped_means.index,
y=subject_values,
text=[f"{v:.2f}" for v in subject_values],
textposition="outside",
marker_color=colors[i],
textfont=dict(size=12, color="black"),
hovertext=hover_texts,
hoverinfo="text",
)
)
# ๋ ˆ์ด์•„์›ƒ ๋ฐ์ดํŠธ (๋‚˜๋จธ์ง€ ์ฝ”๋“œ๋Š” ๋™์ผ)
fig2.update_layout(
title=f"{var}๋ณ„ ๊ณผ๋ชฉ ํ‰๊ท  ์ ์ˆ˜ (๊ณผ๋ชฉ๋ณ„ ๋ฒ”๋ก€)",
xaxis_title=var,
yaxis_title="ํ‰๊ท  ์ ์ˆ˜",
barmode="group",
bargap=0.15,
bargroupgap=0.1,
uniformtext_minsize=8,
uniformtext_mode="hide",
xaxis=dict(
tickangle=45,
tickmode="array",
ticktext=grouped_means.index,
tickvals=list(range(len(grouped_means.index))),
tickfont=dict(size=12),
),
yaxis=dict(
range=[0, 5],
),
margin=dict(
b=150,
t=100,
),
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
),
)
st.plotly_chart(fig2)
def main():
"""๋ฉ”์ธ ์• ํ”Œ์ผ€์ด์…˜ ํ•จ์ˆ˜"""
st.set_page_config(
page_title="์•„์นด๋ฐ๋ฏธ ๋ฐ์ดํ„ฐ ํ†ตํ•ฉ ํ”„๋กœ๊ทธ๋žจ",
page_icon="๐Ÿ“Š",
layout="wide",
initial_sidebar_state="expanded",
menu_items={
"Get Help": None,
"Report a bug": None,
"About": "ยฉ 2024 SK mySUNI",
},
)
# CSS ์Šคํƒ€์ผ ์ ์šฉ
local_css()
# ํ—ค์™€ ํƒ€์ดํ‹€
st.markdown(
"""
<div style='text-align: center; margin-bottom: 2rem;'>
<h1>์•„์นด๋ฐ๋ฏธ ๋ฐ์ดํ„ฐ ํ†ตํ•ฉ ํ”„๋กœ๊ทธ๋žจ</h1>
<p style='color: #666; font-size: 1.2em;'>๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํ†ตํ•ฉ | ๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํ†ต๊ณ„ | PPT ์ฃผ์š”์‘๋‹ต ์ถ”์ถœ</p>
</div>
""",
unsafe_allow_html=True,
)
# ์‚ฌ์ด๋“œ ๋ฉ”๋‰ด
selected_menu = sidebar_menu()
# ํŽ˜์ด์ง€ ๋ผ์šฐํŒ…
if st.session_state.current_menu == "1) ๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํ†ตํ•ฉ":
survey_integration()
elif st.session_state.current_menu == "2) ๋ฐ์ผ๋ฆฌ ์„œ๋ฒ ์ด ํ†ต๊ณ„":
analyze_survey_data()
elif st.session_state.current_menu == "3) PPT ์ฃผ์š”์‘๋‹ต ์ถ”์ถœ":
extract_responses()
# app.py์˜ ๋งจ ๋งˆ์ง€๋ง‰์— ์ถ”๊ฐ€
if __name__ == "__main__":
main()
"""
Copyright ยฉ 2024 SK mySUNI
Created by ๋ฐฐ์ˆ˜์ •RF (soojeong.bae@sk.com)
All Rights Reserved.
"""