Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,8 +17,8 @@ def set_font():
|
|
| 17 |
# ν°νΈ μ€μ μ κ°μ Έμ΅λλ€
|
| 18 |
font_settings = set_font()
|
| 19 |
|
| 20 |
-
# μΈμ
μν μ΄κΈ°ν
|
| 21 |
-
def
|
| 22 |
if 'data' not in st.session_state:
|
| 23 |
st.session_state.data = None
|
| 24 |
if 'processed_data' not in st.session_state:
|
|
@@ -33,6 +33,8 @@ def init_session_state():
|
|
| 33 |
st.session_state.y_var = None
|
| 34 |
if 'slicers' not in st.session_state:
|
| 35 |
st.session_state.slicers = {}
|
|
|
|
|
|
|
| 36 |
|
| 37 |
# λ°μ΄ν° λ‘λ
|
| 38 |
@st.cache_data
|
|
@@ -48,29 +50,27 @@ def load_data(file):
|
|
| 48 |
return data
|
| 49 |
|
| 50 |
def manual_data_entry():
|
| 51 |
-
st.
|
| 52 |
-
col_names = st.text_input("μ΄ μ΄λ¦μ μΌνλ‘ κ΅¬λΆνμ¬ μ
λ ₯νμΈμ:").split(',')
|
| 53 |
col_names = [name.strip() for name in col_names if name.strip()]
|
| 54 |
|
| 55 |
if col_names:
|
| 56 |
-
num_rows = st.number_input("μ΄κΈ° νμ μλ₯Ό μ
λ ₯νμΈμ:", min_value=1, value=5)
|
| 57 |
data = pd.DataFrame(columns=col_names, index=range(num_rows))
|
| 58 |
|
| 59 |
-
edited_data = st.data_editor(data, num_rows="dynamic")
|
| 60 |
|
| 61 |
return edited_data
|
| 62 |
return None
|
| 63 |
|
| 64 |
def preprocess_data(data):
|
| 65 |
-
st.subheader("λ°μ΄ν° μ μ²λ¦¬")
|
| 66 |
-
|
| 67 |
# κ²°μΈ‘μΉ μ²λ¦¬
|
| 68 |
if data.isnull().sum().sum() > 0:
|
| 69 |
st.write("κ²°μΈ‘μΉ μ²λ¦¬:")
|
| 70 |
for column in data.columns:
|
| 71 |
if data[column].isnull().sum() > 0:
|
| 72 |
method = st.selectbox(f"{column} μ΄μ μ²λ¦¬ λ°©λ² μ ν:",
|
| 73 |
-
["μ κ±°", "νκ· μΌλ‘ λ체", "μ€μκ°μΌλ‘ λ체", "μ΅λΉκ°μΌλ‘ λ체"]
|
|
|
|
| 74 |
if method == "μ κ±°":
|
| 75 |
data = data.dropna(subset=[column])
|
| 76 |
elif method == "νκ· μΌλ‘ λ체":
|
|
@@ -101,7 +101,8 @@ def create_slicers(data):
|
|
| 101 |
st.session_state.slicers[col] = st.multiselect(
|
| 102 |
f"{col} μ ν",
|
| 103 |
options=sorted(data[col].unique()),
|
| 104 |
-
default=sorted(data[col].unique())
|
|
|
|
| 105 |
)
|
| 106 |
|
| 107 |
def apply_slicers(data):
|
|
@@ -150,12 +151,12 @@ def plot_scatter_with_regression(data, x_var, y_var):
|
|
| 150 |
st.write(f"p-value: {p_value:.4f}")
|
| 151 |
st.write(f"νμ€ μ€μ°¨: {std_err:.4f}")
|
| 152 |
|
| 153 |
-
def perform_analysis(
|
| 154 |
st.header("νμμ λ°μ΄ν° λΆμ")
|
| 155 |
|
| 156 |
# μ¬λΌμ΄μ μμ± λ° μ μ©
|
| 157 |
-
create_slicers(
|
| 158 |
-
filtered_data = apply_slicers(
|
| 159 |
|
| 160 |
# μμ½ ν΅κ³
|
| 161 |
st.write("μμ½ ν΅κ³:")
|
|
@@ -167,36 +168,38 @@ def perform_analysis(data):
|
|
| 167 |
|
| 168 |
# μ¬μ©μκ° μ νν λ λ³μμ λν μ°μ λ λ° νκ· λΆμ
|
| 169 |
st.subheader("λ λ³μ κ°μ κ΄κ³ λΆμ")
|
| 170 |
-
x_var = st.selectbox("XμΆ λ³μ μ ν", options=st.session_state.numeric_columns, key='x_var')
|
| 171 |
-
y_var = st.selectbox("YμΆ λ³μ μ ν", options=[col for col in st.session_state.numeric_columns if col != x_var], key='y_var')
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
-
|
| 174 |
-
plot_scatter_with_regression(filtered_data, x_var, y_var)
|
| 175 |
|
| 176 |
def main():
|
| 177 |
st.title("μΈν°λν°λΈ EDA ν΄ν·")
|
| 178 |
|
| 179 |
-
|
| 180 |
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
|
|
|
| 187 |
else:
|
| 188 |
-
st.session_state.data =
|
| 189 |
-
else:
|
| 190 |
-
st.session_state.data = manual_data_entry()
|
| 191 |
|
| 192 |
if st.session_state.data is not None:
|
| 193 |
st.subheader("λ°μ΄ν° 미리보기 λ° μμ ")
|
| 194 |
st.write("λ°μ΄ν°λ₯Ό νμΈνκ³ νμν κ²½μ° μμ νμΈμ:")
|
| 195 |
-
edited_data = st.data_editor(st.session_state.data, num_rows="dynamic")
|
| 196 |
|
| 197 |
-
if st.button("λ°μ΄ν° λΆμ μμ"):
|
| 198 |
-
st.session_state.
|
| 199 |
-
|
|
|
|
| 200 |
|
| 201 |
if __name__ == "__main__":
|
| 202 |
main()
|
|
|
|
| 17 |
# ν°νΈ μ€μ μ κ°μ Έμ΅λλ€
|
| 18 |
font_settings = set_font()
|
| 19 |
|
| 20 |
+
# μΈμ
μν μ΄κΈ°ν λ° κ΄λ¦¬
|
| 21 |
+
def manage_session_state():
|
| 22 |
if 'data' not in st.session_state:
|
| 23 |
st.session_state.data = None
|
| 24 |
if 'processed_data' not in st.session_state:
|
|
|
|
| 33 |
st.session_state.y_var = None
|
| 34 |
if 'slicers' not in st.session_state:
|
| 35 |
st.session_state.slicers = {}
|
| 36 |
+
if 'analysis_performed' not in st.session_state:
|
| 37 |
+
st.session_state.analysis_performed = False
|
| 38 |
|
| 39 |
# λ°μ΄ν° λ‘λ
|
| 40 |
@st.cache_data
|
|
|
|
| 50 |
return data
|
| 51 |
|
| 52 |
def manual_data_entry():
|
| 53 |
+
col_names = st.text_input("μ΄ μ΄λ¦μ μΌνλ‘ κ΅¬λΆνμ¬ μ
λ ₯νμΈμ:", key="manual_col_names").split(',')
|
|
|
|
| 54 |
col_names = [name.strip() for name in col_names if name.strip()]
|
| 55 |
|
| 56 |
if col_names:
|
| 57 |
+
num_rows = st.number_input("μ΄κΈ° νμ μλ₯Ό μ
λ ₯νμΈμ:", min_value=1, value=5, key="manual_num_rows")
|
| 58 |
data = pd.DataFrame(columns=col_names, index=range(num_rows))
|
| 59 |
|
| 60 |
+
edited_data = st.data_editor(data, num_rows="dynamic", key="manual_data_editor")
|
| 61 |
|
| 62 |
return edited_data
|
| 63 |
return None
|
| 64 |
|
| 65 |
def preprocess_data(data):
|
|
|
|
|
|
|
| 66 |
# κ²°μΈ‘μΉ μ²λ¦¬
|
| 67 |
if data.isnull().sum().sum() > 0:
|
| 68 |
st.write("κ²°μΈ‘μΉ μ²λ¦¬:")
|
| 69 |
for column in data.columns:
|
| 70 |
if data[column].isnull().sum() > 0:
|
| 71 |
method = st.selectbox(f"{column} μ΄μ μ²λ¦¬ λ°©λ² μ ν:",
|
| 72 |
+
["μ κ±°", "νκ· μΌλ‘ λ체", "μ€μκ°μΌλ‘ λ체", "μ΅λΉκ°μΌλ‘ λ체"],
|
| 73 |
+
key=f"missing_{column}")
|
| 74 |
if method == "μ κ±°":
|
| 75 |
data = data.dropna(subset=[column])
|
| 76 |
elif method == "νκ· μΌλ‘ λ체":
|
|
|
|
| 101 |
st.session_state.slicers[col] = st.multiselect(
|
| 102 |
f"{col} μ ν",
|
| 103 |
options=sorted(data[col].unique()),
|
| 104 |
+
default=sorted(data[col].unique()),
|
| 105 |
+
key=f"slicer_{col}"
|
| 106 |
)
|
| 107 |
|
| 108 |
def apply_slicers(data):
|
|
|
|
| 151 |
st.write(f"p-value: {p_value:.4f}")
|
| 152 |
st.write(f"νμ€ μ€μ°¨: {std_err:.4f}")
|
| 153 |
|
| 154 |
+
def perform_analysis():
|
| 155 |
st.header("νμμ λ°μ΄ν° λΆμ")
|
| 156 |
|
| 157 |
# μ¬λΌμ΄μ μμ± λ° μ μ©
|
| 158 |
+
create_slicers(st.session_state.processed_data)
|
| 159 |
+
filtered_data = apply_slicers(st.session_state.processed_data)
|
| 160 |
|
| 161 |
# μμ½ ν΅κ³
|
| 162 |
st.write("μμ½ ν΅κ³:")
|
|
|
|
| 168 |
|
| 169 |
# μ¬μ©μκ° μ νν λ λ³μμ λν μ°μ λ λ° νκ· λΆμ
|
| 170 |
st.subheader("λ λ³μ κ°μ κ΄κ³ λΆμ")
|
| 171 |
+
st.session_state.x_var = st.selectbox("XμΆ λ³μ μ ν", options=st.session_state.numeric_columns, key='x_var')
|
| 172 |
+
st.session_state.y_var = st.selectbox("YμΆ λ³μ μ ν", options=[col for col in st.session_state.numeric_columns if col != st.session_state.x_var], key='y_var')
|
| 173 |
+
|
| 174 |
+
if st.session_state.x_var and st.session_state.y_var:
|
| 175 |
+
plot_scatter_with_regression(filtered_data, st.session_state.x_var, st.session_state.y_var)
|
| 176 |
|
| 177 |
+
st.session_state.analysis_performed = True
|
|
|
|
| 178 |
|
| 179 |
def main():
|
| 180 |
st.title("μΈν°λν°λΈ EDA ν΄ν·")
|
| 181 |
|
| 182 |
+
manage_session_state()
|
| 183 |
|
| 184 |
+
if not st.session_state.data:
|
| 185 |
+
data_input_method = st.radio("λ°μ΄ν° μ
λ ₯ λ°©λ² μ ν:", ("νμΌ μ
λ‘λ", "μλ μ
λ ₯"), key="data_input_method")
|
| 186 |
+
|
| 187 |
+
if data_input_method == "νμΌ μ
λ‘λ":
|
| 188 |
+
uploaded_file = st.file_uploader("CSV, XLS, λλ XLSX νμΌμ μ ννμΈμ", type=["csv", "xls", "xlsx"], key="file_uploader")
|
| 189 |
+
if uploaded_file:
|
| 190 |
+
st.session_state.data = load_data(uploaded_file)
|
| 191 |
else:
|
| 192 |
+
st.session_state.data = manual_data_entry()
|
|
|
|
|
|
|
| 193 |
|
| 194 |
if st.session_state.data is not None:
|
| 195 |
st.subheader("λ°μ΄ν° 미리보기 λ° μμ ")
|
| 196 |
st.write("λ°μ΄ν°λ₯Ό νμΈνκ³ νμν κ²½μ° μμ νμΈμ:")
|
| 197 |
+
edited_data = st.data_editor(st.session_state.data, num_rows="dynamic", key="data_editor")
|
| 198 |
|
| 199 |
+
if st.button("λ°μ΄ν° λΆμ μμ", key="start_analysis") or st.session_state.analysis_performed:
|
| 200 |
+
if not st.session_state.analysis_performed:
|
| 201 |
+
st.session_state.processed_data = preprocess_data(edited_data)
|
| 202 |
+
perform_analysis()
|
| 203 |
|
| 204 |
if __name__ == "__main__":
|
| 205 |
main()
|