OatNapat commited on
Commit
45f4df4
·
1 Parent(s): b972734

Upload app (1).py

Browse files
Files changed (1) hide show
  1. app (1).py +113 -0
app (1).py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import io
4
+ import base64
5
+ from sklearn.impute import SimpleImputer
6
+
7
+
8
+ st.set_page_config(page_title="CSV Data Cleaning Tool")
9
+
10
+ hide_streamlit_style = """
11
+ <style>
12
+ #MainMenu {visibility: hidden;}
13
+ footer {visibility: hidden;}
14
+ </style>
15
+ """
16
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
17
+
18
+ st.title("CSV Data Tool")
19
+
20
+ st.markdown("กดเลือกหัว Tool ข้อที่ต้องการจะใช้ได้เลยนะจ๊ะ")
21
+
22
+ uploaded_files = st.file_uploader("Choose CSV files", type="csv", accept_multiple_files=True)
23
+
24
+ dataframes = []
25
+
26
+ if uploaded_files:
27
+ for file in uploaded_files:
28
+ file.seek(0)
29
+ df = pd.read_csv(file)
30
+ dataframes.append(df)
31
+
32
+ st.markdown("Data Cleansing")
33
+
34
+ duplicate_columns = st.checkbox("Remove duplicate columns", value=False)
35
+ if duplicate_columns :
36
+ for i, df in enumerate(dataframes):
37
+ dataframes[i] = df.drop_duplicates(inplace=False)
38
+
39
+ remove_empty_rows = st.checkbox("Remove empty rows", value=False)
40
+ if remove_empty_rows:
41
+ for i, df in enumerate(dataframes):
42
+ dataframes[i] = df.dropna(how="all", inplace=False)
43
+
44
+ impute_mean = st.checkbox("Impute missing values with mean (for int and float columns)",value=False)
45
+ if impute_mean:
46
+ for i, df in enumerate(dataframes):
47
+ numeric_cols = df.select_dtypes(include=['int', 'float']).columns
48
+ imputer = SimpleImputer(strategy='mean')
49
+ df[numeric_cols] = imputer.fit_transform(df[numeric_cols])
50
+ dataframes[i] = df
51
+
52
+ impute_most_frequent = st.checkbox("Impute missing values with most frequent category (for categorical columns)",value=False)
53
+ if impute_most_frequent:
54
+ for i, df in enumerate(dataframes):
55
+ categorical_cols = df.select_dtypes(include=['object']).columns
56
+ imputer = SimpleImputer(strategy='most_frequent')
57
+ df[categorical_cols] = imputer.fit_transform(df[categorical_cols])
58
+ dataframes[i] = df
59
+
60
+
61
+ selected_out = st.selectbox("เลือก columns ที่จะดู Outlier", df.columns)
62
+ if selected_out:
63
+ col = selected_out
64
+ st.write(f"คอลัมน์ {col}:")
65
+ # Calculate Z-Scores for the selected column
66
+ z_scores = np.abs((df[col] - df[col].mean()) / df[col].std())
67
+ # Set a threshold for identifying outliers (e.g., z_score > 3)
68
+ threshold = 3
69
+ # Identify outliers
70
+ outliers = df[z_scores > threshold]
71
+ st.write("Outliers:")
72
+ st.write(outliers)
73
+
74
+
75
+ st.markdown("Data transform")
76
+
77
+ for i, df in enumerate(dataframes):
78
+ st.dataframe(df)
79
+
80
+ selected_values = st.multiselect("เลือกค่าจากคอลัมน์", df.columns)
81
+
82
+
83
+ convert_to_String = st.checkbox("convert columns to String", value=False)
84
+ convert_to_float = st.checkbox("convert columns to Float", value=False)
85
+ if convert_to_String:
86
+ df[selected_values] = df[selected_values].astype(str)
87
+ if convert_to_float:
88
+ df[selected_values] = df[selected_values].astype(float)
89
+
90
+
91
+
92
+
93
+
94
+ show_dataframes = st.checkbox("Show DataFrames", value=True)
95
+ if show_dataframes:
96
+ for i, df in enumerate(dataframes):
97
+ st.write(f"DataFrame {i + 1}")
98
+ st.dataframe(df)
99
+
100
+
101
+
102
+ if st.button("Download cleaned data"):
103
+ for i, df in enumerate(dataframes):
104
+ csv = df.to_csv(index=False)
105
+ b64 = base64.b64encode(csv.encode()).decode()
106
+ href = f'<a href="data:file/csv;base64,{b64}" download="cleaned_data_{i + 1}.csv">Download cleaned_data_{i + 1}.csv</a>'
107
+ st.markdown(href, unsafe_allow_html=True)
108
+
109
+
110
+ st.markdown("")
111
+ st.markdown("---")
112
+ st.markdown("")
113
+