Spaces:
Runtime error
Runtime error
File size: 3,897 Bytes
4f69ce1 4991b3b 4f69ce1 016fc7d 4f69ce1 b482bc3 225fd88 4f69ce1 225fd88 4f69ce1 0525bbf 4f69ce1 2909b36 225fd88 93c4bb6 4a06a27 99dabae f73c569 99dabae 93c4bb6 4a06a27 38bfc29 d4c2055 f9ef0ea 6401bd8 f9ef0ea 747fbf3 f9ef0ea 3e398bd b482bc3 f87b3db 3051a2e 442fb4c fb97a4c 693236c 1297438 a1e60c1 a5a2abc 442fb4c 1297438 3e398bd 4a06a27 225fd88 3e398bd 225fd88 ed654a4 b83927e 4f69ce1 6401bd8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | import streamlit as st
import pandas as pd
import io
import base64
from sklearn.impute import SimpleImputer
st.set_page_config(page_title="CSV Data Cleaning Tool")
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
st.title("CSV Data Tool")
st.markdown("กดเลือกหัว Tool ข้อที่ต้องการจะใช้ได้เลยนะจ๊ะ")
uploaded_files = st.file_uploader("Choose CSV files", type="csv", accept_multiple_files=True)
dataframes = []
if uploaded_files:
for file in uploaded_files:
file.seek(0)
df = pd.read_csv(file)
dataframes.append(df)
st.markdown("Data Cleansing")
duplicate_columns = st.checkbox("Remove duplicate columns", value=False)
if duplicate_columns :
for i, df in enumerate(dataframes):
dataframes[i] = df.drop_duplicates(inplace=False)
remove_empty_rows = st.checkbox("Remove empty rows", value=False)
if remove_empty_rows:
for i, df in enumerate(dataframes):
dataframes[i] = df.dropna(how="all", inplace=False)
impute_mean = st.checkbox("Impute missing values with mean (for int and float columns)",value=False)
if impute_mean:
for i, df in enumerate(dataframes):
numeric_cols = df.select_dtypes(include=['int', 'float']).columns
imputer = SimpleImputer(strategy='mean')
df[numeric_cols] = imputer.fit_transform(df[numeric_cols])
dataframes[i] = df
impute_most_frequent = st.checkbox("Impute missing values with most frequent category (for categorical columns)",value=False)
if impute_most_frequent:
for i, df in enumerate(dataframes):
categorical_cols = df.select_dtypes(include=['object']).columns
imputer = SimpleImputer(strategy='most_frequent')
df[categorical_cols] = imputer.fit_transform(df[categorical_cols])
dataframes[i] = df
selected_out = st.selectbox("เลือก columns ที่จะดู Outlier", df.columns)
if selected_out:
col = selected_out
st.write(f"คอลัมน์ {col}:")
# Calculate Z-Scores for the selected column
z_scores = np.abs((df[col] - df[col].mean()) / df[col].std())
# Set a threshold for identifying outliers (e.g., z_score > 3)
threshold = 3
# Identify outliers
outliers = df[z_scores > threshold]
st.write("Outliers:")
st.write(outliers)
st.markdown("Data transform")
for i, df in enumerate(dataframes):
st.dataframe(df)
selected_values = st.multiselect("เลือกค่าจากคอลัมน์", df.columns)
convert_to_String = st.checkbox("convert columns to String", value=False)
convert_to_float = st.checkbox("convert columns to Float", value=False)
if convert_to_String:
df[selected_values] = df[selected_values].astype(str)
if convert_to_float:
df[selected_values] = df[selected_values].astype(float)
show_dataframes = st.checkbox("Show DataFrames", value=True)
if show_dataframes:
for i, df in enumerate(dataframes):
st.write(f"DataFrame {i + 1}")
st.dataframe(df)
if st.button("Download cleaned data"):
for i, df in enumerate(dataframes):
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode()
href = f'<a href="data:file/csv;base64,{b64}" download="cleaned_data_{i + 1}.csv">Download cleaned_data_{i + 1}.csv</a>'
st.markdown(href, unsafe_allow_html=True)
st.markdown("")
st.markdown("---")
st.markdown("")
|