| import streamlit as st |
| import seaborn as sns |
| import matplotlib.pyplot as plt |
| import matplotlib.patheffects as path_effects |
| import pandas as pd |
| import numpy as np |
|
|
| |
| st.title("๐ ๋ฐ์ดํฐ ์๊ฐํ ๋ฐ ๋ถ์") |
|
|
| |
| st.subheader("1๏ธโฃ ๋ฐ์ดํฐ ๋ถ๋ฌ์ค๊ธฐ") |
| tab1, tab2 = st.tabs(["seaborn ๋ฐ์ดํฐ์
", "ํ์ผ ์
๋ก๋"]) |
|
|
| |
| with tab1: |
| dataset_name = st.text_input('๋ฐ์ดํฐ ์์: titanic, tips, taxis, penguins, iris...:') |
| sample_checked = st.checkbox('seaborn ๋ฐ์ดํฐ ํ์ธํ๊ธฐ') |
|
|
| if sample_checked: |
| with st.spinner('์ํ ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค๋ ์ค ์
๋๋ค...'): |
| try: |
| df = sns.load_dataset(dataset_name) |
| st.write(df.head(3)) |
| except: |
| st.write("โ ๋ฐ์ดํฐ์
์ด๋ฆ์ ๋ค์ ํ์ธํด์ฃผ์ธ์!") |
|
|
| with tab2: |
| st.write("๋จ, ์ด ๋ฐฉ๋ฒ์ csv ํ์ผ๋ง ์ง์ํฉ๋๋ค.") |
| custom_data = st.file_uploader("๋ถ์ํ๊ณ ์ถ์ ํ์ผ์ ์
๋ก๋ํด์ฃผ์ธ์.", type="csv") |
| if custom_data: |
| custom_data = pd.read_csv(custom_data, encoding = 'utf-8') |
| st.session_state['custom_data'] = custom_data |
|
|
| upload_checked = st.checkbox('์
๋ก๋ํ ํ์ผ ํ์ธํ๊ธฐ!') |
| if upload_checked: |
| with st.spinner('์ค๋ณต์ ํ์ธํ๋ ์ค์
๋๋ค...'): |
| try: |
| st.write(custom_data.head(5)) |
| df = custom_data |
| except: |
| st.write("โ ์ฌ๋ฐ๋ฅธ ํ์ผ์ ์
๋ก๋ํ์
จ๋์ง ํ์ธํด์ฃผ์ธ์!") |
|
|
|
|
| |
| column_names = df.columns.tolist() |
|
|
| |
| selected_columns = st.multiselect('๋ถ์ํ๊ณ ์ ํ๋ ์ด์ ์ ํํ์ธ์:', column_names, default=column_names) |
|
|
| |
| if selected_columns: |
| st.write(df[selected_columns].head(3)) |
|
|
| tab1, tab2 = st.tabs(["๊ธฐ์ ํต๊ณ๋", "๋ฐ์ดํฐ ์๊ฐํ"]) |
|
|
| with tab1: |
| st.write('# ๊ธฐ์ ํต๊ณ๋') |
| |
|
|
| with tab2: |
| |
| variable_type = st.radio("์ผ๋ณ๋ ๋ฐ์ดํฐ๋ฅผ ์ ํํด์ฃผ์ธ์.", ("์์นํ", "๋ฒ์ฃผํ")) |
|
|
| def get_slider_step(min_value, max_value): |
| value_range = max_value - min_value |
| bins_size_min = float((min_value // 5) * 5) |
| bins_size_max = float((value_range) / 5) if value_range != 0 else 1.0 |
|
|
| if value_range < 1: |
| step = 0.1 |
| elif value_range < 10: |
| step = 0.5 |
| else: |
| step = 1 |
| bins_size_min = int(bins_size_min) |
| bins_size_max = int(bins_size_max) |
|
|
| return bins_size_min+step, bins_size_max, step |
|
|
| |
| if variable_type == "์์นํ": |
| |
| st.write("์์นํ ๋ฐ์ดํฐ๋ฅผ ํ์คํ ๊ทธ๋จ๊ณผ ์์๊ทธ๋ฆผ์ผ๋ก ํํํฉ๋๋ค. ") |
|
|
|
|
| try: |
| colname = st.text_input("์๊ฐํํ๊ณ ์ถ์ ์ด ์ด๋ฆ์ ์จ์ฃผ์ธ์!") |
| if colname != "": |
| data = df[colname] |
| |
| summary_stats = pd.DataFrame({ |
| 'ํ๊ท ': np.mean(data), |
| 'ํ์คํธ์ฐจ': np.std(data), |
| '์ต์๊ฐ': np.min(data), |
| '์ค์๊ฐ': np.median(data), |
| '์ต๋๊ฐ': np.max(data) |
| }, index=['ํต๊ณ๋']) |
|
|
| st.write(summary_stats) |
| minvalue = min(df[colname]) |
| maxvalue = max(df[colname]) |
| st.write(colname, '์ ์ต์๊ฐ:', minvalue, '์ ์ต๋๊ฐ:',maxvalue) |
| bins_size_min, bins_size_max, step = get_slider_step(minvalue, maxvalue) |
| st.write(step) |
| bins_size = st.slider("๊ณ๊ธ์ ํฌ๊ธฐ๋ฅผ ์ค์ ํด์ฃผ์ธ์.", |
| min_value=bins_size_min, |
| max_value=bins_size_max, |
| step=step) |
|
|
| |
| fig = plt.figure(figsize=(5, 3)) |
|
|
| |
| |
| |
| |
|
|
|
|
|
|
| st.write("ํ์คํ ๊ทธ๋จ์ ๊ณ๊ธ์ ํฌ๊ธฐ:",bins_size) |
| |
| sns.set_style("darkgrid") |
| plt.title('Histogram of {}'.format(colname)) |
| sns.histplot(x=df[colname], binwidth=bins_size, binrange = [min(df[colname]), max(df[colname])], kde=False) |
| plt.xlabel("") |
| st.pyplot(fig) |
|
|
| |
| hide_outliers = st.checkbox("์ด์์น ์จ๊ธฐ๊ธฐ") |
|
|
| |
| showfliers = not hide_outliers |
| fig2 = plt.figure(figsize=(5, 1)) |
| plt.title('Boxplot of {}'.format(colname)) |
| sns.set_style("darkgrid") |
| |
| sns.boxplot(x=df[colname], palette="Set2", showfliers=showfliers) |
| plt.xlabel("") |
| st.pyplot(fig2) |
|
|
| except ValueError: |
| st.write("์ฌ๋ฐ๋ฅธ ์ด ์ด๋ฆ์ ์จ์ฃผ์ธ์!") |
| st.stop() |
|
|
| |
| elif variable_type =='๋ฒ์ฃผํ': |
| |
| st.write("๋ฒ์ฃผํ ๋ฐ์ดํฐ๋ฅผ ๋ง๋๊ทธ๋ํ๋ก ํํํฉ๋๋ค.") |
|
|
| try: |
| colname = st.text_input("์๊ฐํํ๊ณ ์ถ์ ์ด ์ด๋ฆ์ ์
๋ ฅํด์ฃผ์ธ์!") |
| if colname != "": |
| |
| fig = plt.figure(figsize=(5,3)) |
| ax = sns.countplot(x=df[colname], palette="Blues") |
|
|
| |
| for p in ax.patches: |
| height = p.get_height() |
| ax.annotate(format(height, ','), |
| (p.get_x() + p.get_width() / 2, height), |
| ha='center', va='center', |
| xytext=(0, -10), textcoords='offset points', |
| fontsize=10, color='black', |
| path_effects=[path_effects.Stroke(linewidth=3, foreground='white'), |
| path_effects.Normal()]) |
|
|
| plt.title('Barplot of {}'.format(colname)) |
| sns.set_style("darkgrid") |
| plt.xlabel("") |
| st.pyplot(fig) |
|
|
| except ValueError: |
| st.write("์ฌ๋ฐ๋ฅธ ์ด ์ด๋ฆ์ ์
๋ ฅํด์ฃผ์ธ์!") |
| st.stop() |
|
|
|
|
| variable_type_group = st.radio("๊ทธ๋ฃน๋ณ ๋ฐ์ดํฐ๋ฅผ ์ ํํด์ฃผ์ธ์. (์: ํด๋์ค์ ๋ฐ๋ฅธ ์์กด์จ)", ("์์นํ", "๋ฒ์ฃผํ")) |
|
|
|
|
| if variable_type_group == "์์นํ": |
| |
| st.write("๊ทธ๋ฃน๋ณ ์์นํ ๋ฐ์ดํฐ๋ฅผ ํ์คํ ๊ทธ๋จ๊ณผ ์์๊ทธ๋ฆผ์ผ๋ก ํํํฉ๋๋ค. ") |
|
|
|
|
| try: |
| colname_group = st.text_input("๊ทธ๋ฃน ์ด ์ด๋ฆ์ ์จ์ฃผ์ธ์!") |
| colname_2 = st.text_input("๊ทธ๋ฃน๋ณ๋ก ์๊ฐํํ๊ณ ์ถ์ ์์นํ ์ด ์ด๋ฆ์ ์จ์ฃผ์ธ์!") |
| if colname_2 != "": |
|
|
| data = df[[colname_2, colname_group]] |
| |
| stat = data.groupby(colname_group)[colname_2].agg( |
| mean='mean', |
| std='std', |
| min='min', |
| median='median', |
| max='max' |
| ) |
| stat.columns = ['ํ๊ท ', 'ํ์คํธ์ฐจ', '์ต์๊ฐ', '์ค์๊ฐ', '์ต๋๊ฐ'] |
| st.write(stat) |
|
|
| minvalue = min(df[colname_2]) |
| maxvalue = max(df[colname_2]) |
| st.write(colname_2, '์ ์ต์๊ฐ:', minvalue, '์ ์ต๋๊ฐ:',maxvalue) |
| bins_size_min, bins_size_max, step = get_slider_step(minvalue, maxvalue) |
| st.write(step) |
| bins_size = st.slider("๊ณ๊ธ์ ํฌ๊ธฐ๋ฅผ ์ค์ ํด์ฃผ์ธ์. (group)", |
| min_value=bins_size_min, |
| max_value=bins_size_max, |
| step=step) |
|
|
| |
| fig = plt.figure(figsize=(5, 3)) |
|
|
| st.write("ํ์คํ ๊ทธ๋จ์ ๊ณ๊ธ์ ํฌ๊ธฐ:",bins_size) |
| |
| sns.set_style("darkgrid") |
| plt.title('Histogram of {}'.format(colname_2)) |
| sns.histplot(data = data, x = colname_2, hue = colname_group, binwidth=bins_size, binrange = [min(df[colname_2]), max(df[colname_2])], kde=False) |
| plt.xlabel("") |
| st.pyplot(fig) |
|
|
| |
| hide_outliers = st.checkbox("์ด์์น ์จ๊ธฐ๊ธฐ ") |
|
|
| |
| showfliers = not hide_outliers |
| fig2 = plt.figure(figsize=(5, 5)) |
| plt.title('Boxplot of {}'.format(colname_2)) |
| sns.set_style("darkgrid") |
| |
| sns.boxplot(data = df, x = colname_group, y = colname_2, palette="Set2", showfliers=showfliers) |
| plt.xlabel("") |
| st.pyplot(fig2) |
|
|
| except ValueError: |
| st.write("์ฌ๋ฐ๋ฅธ ์ด ์ด๋ฆ์ ์จ์ฃผ์ธ์!") |
| st.stop() |
|
|
| |
| elif variable_type_group=='๋ฒ์ฃผํ': |
| |
| st.write("๋ฒ์ฃผํ ๋ฐ์ดํฐ๋ฅผ ๋ง๋๊ทธ๋ํ๋ก ํํํฉ๋๋ค.") |
|
|
| try: |
| colname_group = st.text_input("๊ทธ๋ฃน ์ด ์ด๋ฆ์ ์จ์ฃผ์ธ์!") |
| colname_2 = st.text_input("๊ทธ๋ฃน๋ณ๋ก ์๊ฐํํ๊ณ ์ถ์ ์์นํ ์ด ์ด๋ฆ์ ์
๋ ฅํด์ฃผ์ธ์!") |
| if colname_2 != "": |
| |
| fig = plt.figure(figsize=(5,3)) |
| ax = sns.countplot(x=df[colname_2], palette="Blues") |
|
|
| |
| for p in ax.patches: |
| height = p.get_height() |
| ax.annotate(format(height, ','), |
| (p.get_x() + p.get_width() / 2, height), |
| ha='center', va='center', |
| xytext=(0, -10), textcoords='offset points', |
| fontsize=10, color='black', |
| path_effects=[path_effects.Stroke(linewidth=3, foreground='white'), |
| path_effects.Normal()]) |
|
|
| plt.title('Barplot of {}'.format(colname_2)) |
| sns.set_style("darkgrid") |
| plt.xlabel("") |
| st.pyplot(fig) |
|
|
| except ValueError: |
| st.write("์ฌ๋ฐ๋ฅธ ์ด ์ด๋ฆ์ ์
๋ ฅํด์ฃผ์ธ์!") |
| st.stop() |
|
|