1MR commited on
Commit
587e047
·
verified ·
1 Parent(s): a7fd28b

Update Preprocessing1.py

Browse files
Files changed (1) hide show
  1. Preprocessing1.py +128 -145
Preprocessing1.py CHANGED
@@ -1,145 +1,128 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import io
5
- import matplotlib.pyplot as plt
6
- from sklearn.preprocessing import LabelEncoder
7
- import seaborn as sns
8
- import base64
9
-
10
-
11
- def preview_data():
12
- if "data" in st.session_state:
13
- data = st.session_state["data"]
14
-
15
- st.write("### Dataset Preview Options:")
16
-
17
- preview_option = st.radio(
18
- "Select how to preview the dataset:",
19
- options=["Head", "Tail", "Custom Number of Rows"],
20
- index=0
21
- )
22
-
23
- if preview_option == "Head":
24
- st.write("### First 5 Rows of the Dataset:")
25
- st.dataframe(data.head())
26
- elif preview_option == "Tail":
27
- st.write("### Last 5 Rows of the Dataset:")
28
- st.dataframe(data.tail())
29
- elif preview_option == "Custom Number of Rows":
30
- number = st.slider(
31
- "Select Number of Rows to Display:", 1, len(data))
32
- st.write(f"### First {number} Rows of the Dataset:")
33
- st.dataframe(data.head(number))
34
-
35
- # Show entire data
36
- if st.checkbox("Show all data"):
37
- st.write(data)
38
-
39
- # Show column names
40
- if st.checkbox("Show Column Names"):
41
- st.write(data.columns)
42
-
43
- # Show dataset dimensions (rows and columns)
44
- if st.checkbox("Show Dimensions"):
45
- st.write(data.shape)
46
-
47
- else:
48
- st.warning("Please upload a dataset to view options.")
49
-
50
-
51
- def data_cleaning():
52
- if "data" in st.session_state:
53
- data = st.session_state["data"]
54
-
55
- st.subheader("Data Cleaning")
56
-
57
- col_option = st.selectbox("Choose your option", [
58
- "Check all numeric features are numeric?", "Show unique values of categorical features"])
59
-
60
- # Check and convert numeric columns
61
- if col_option == "Check all numeric features are numeric?":
62
- st.write("Converting all numeric columns to numeric types...")
63
- numeric_columns = list(
64
- data.select_dtypes(include=np.number).columns)
65
- for col in numeric_columns:
66
- data[col] = pd.to_numeric(data[col], errors='coerce')
67
-
68
- st.success("Done!")
69
-
70
- # Show unique values for categorical features
71
- elif col_option == "Show unique values of categorical features":
72
- st.write("Unique values for categorical features:")
73
- for column in data.columns:
74
- # check for categorical features (strings)
75
- if data[column].dtype == object:
76
- st.write(f"{column}: {data[column].unique()}")
77
- st.write("====================================")
78
-
79
- else:
80
- st.warning("Please upload a dataset to perform data cleaning.")
81
-
82
-
83
- def modify_column_names():
84
- st.title("Modify Column Names")
85
-
86
- # Ensure data exists in the session
87
- if "data" in st.session_state:
88
- df = st.session_state["data"]
89
-
90
- # Ensure modified_columns is initialized in session state
91
- if "modified_columns" not in st.session_state:
92
- st.session_state.modified_columns = list(df.columns)
93
-
94
- st.write('### *Current Column Names*')
95
- st.table(df.columns)
96
-
97
- st.write('### *Modify Column Names*')
98
- with st.expander("Modify Column Names", expanded=True):
99
- # Use the modified columns from session state
100
- before_col = st.session_state.modified_columns
101
- before_col_df = pd.DataFrame(before_col, columns=['Column Name'])
102
- st.table(before_col_df)
103
-
104
- col3, col4, col5, col6 = st.columns(4)
105
- changes_made = False # Flag to track if any change is made
106
-
107
- if st.button('Convert to Uppercase'):
108
- st.session_state.modified_columns = [
109
- col.upper() for col in before_col]
110
- changes_made = True
111
- if st.button('Convert to Lowercase'):
112
- st.session_state.modified_columns = [
113
- col.lower() for col in before_col]
114
- changes_made = True
115
- if st.button('Replace Spaces with Underscore'):
116
- st.session_state.modified_columns = [
117
- col.replace(" ", "_") for col in before_col]
118
- changes_made = True
119
- if st.button('Capitalize First Letters'):
120
- st.session_state.modified_columns = [
121
- col.title() for col in before_col]
122
- changes_made = True
123
-
124
- # Apply the changes only if a change was made
125
- if changes_made:
126
- df.columns = st.session_state.modified_columns
127
- st.success("Changes applied successfully.")
128
- st.table(pd.DataFrame(
129
- df.columns, columns=['Modified Columns']))
130
-
131
- st.write("### *Modify a Specific Column Name*")
132
- column_select = st.selectbox(
133
- 'Select column to modify', options=st.session_state.modified_columns)
134
- new_column_name = st.text_input('Enter new column name')
135
- if st.button('Update Column Name'):
136
- if column_select and new_column_name:
137
- st.session_state.modified_columns = [
138
- new_column_name if col == column_select else col for col in st.session_state.modified_columns]
139
- df.columns = st.session_state.modified_columns
140
- st.success("Column name updated.")
141
- st.table(pd.DataFrame(
142
- df.columns, columns=['Modified Columns']))
143
-
144
- else:
145
- st.warning("Please upload a dataset first.")
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import io
5
+ import matplotlib.pyplot as plt
6
+ from sklearn.preprocessing import LabelEncoder
7
+ import seaborn as sns
8
+ import base64
9
+
10
+
11
+ def preview_data():
12
+ if "data" in st.session_state:
13
+ data = st.session_state["data"]
14
+
15
+ st.write("### Dataset Preview Options:")
16
+
17
+ preview_option = st.radio(
18
+ "Select how to preview the dataset:",
19
+ options=["Head", "Tail", "Custom Number of Rows"],
20
+ index=0
21
+ )
22
+
23
+ if preview_option == "Head":
24
+ st.write("### First 5 Rows of the Dataset:")
25
+ st.dataframe(data.head())
26
+ elif preview_option == "Tail":
27
+ st.write("### Last 5 Rows of the Dataset:")
28
+ st.dataframe(data.tail())
29
+ elif preview_option == "Custom Number of Rows":
30
+ number = st.slider("Select Number of Rows to Display:", 1, len(data))
31
+ st.write(f"### First {number} Rows of the Dataset:")
32
+ st.dataframe(data.head(number))
33
+
34
+ if st.checkbox("Show all data"):
35
+ st.write(data)
36
+
37
+ if st.checkbox("Show Column Names"):
38
+ st.write(data.columns)
39
+
40
+ if st.checkbox("Show Dimensions"):
41
+ st.write(data.shape)
42
+
43
+ else:
44
+ st.warning("Please upload a dataset to view options.")
45
+
46
+
47
+
48
+ def data_cleaning():
49
+ if "data" in st.session_state:
50
+ data = st.session_state["data"]
51
+
52
+ st.subheader("Data Cleaning")
53
+
54
+ col_option = st.selectbox("Choose your option", [
55
+ "Check all numeric features are numeric?", "Show unique values of categorical features"])
56
+
57
+ if col_option == "Check all numeric features are numeric?":
58
+ st.write("Converting all numeric columns to numeric types...")
59
+ numeric_columns = list(data.select_dtypes(include=np.number).columns)
60
+ for col in numeric_columns:
61
+ data[col] = pd.to_numeric(data[col], errors='coerce')
62
+
63
+ st.session_state["data"] = data
64
+ st.success("Done!")
65
+
66
+ elif col_option == "Show unique values of categorical features":
67
+ st.write("Unique values for categorical features:")
68
+ for column in data.columns:
69
+ if data[column].dtype == object:
70
+ st.write(f"{column}: {data[column].unique()}")
71
+
72
+ else:
73
+ st.warning("Please upload a dataset to perform data cleaning.")
74
+
75
+
76
+ def modify_column_names():
77
+ st.title("Modify Column Names")
78
+
79
+ if "data" in st.session_state:
80
+ df = st.session_state["data"]
81
+
82
+ if "modified_columns" not in st.session_state:
83
+ st.session_state.modified_columns = list(df.columns)
84
+
85
+ st.write('### *Current Column Names*')
86
+ st.table(df.columns)
87
+
88
+ st.write('### *Modify Column Names*')
89
+ with st.expander("Modify Column Names", expanded=True):
90
+ before_col = st.session_state.modified_columns
91
+ before_col_df = pd.DataFrame(before_col, columns=['Column Name'])
92
+ st.table(before_col_df)
93
+
94
+ col3, col4, col5, col6 = st.columns(4)
95
+ changes_made = False
96
+
97
+ if st.button('Convert to Uppercase'):
98
+ st.session_state.modified_columns = [col.upper() for col in before_col]
99
+ changes_made = True
100
+ if st.button('Convert to Lowercase'):
101
+ st.session_state.modified_columns = [col.lower() for col in before_col]
102
+ changes_made = True
103
+ if st.button('Replace Spaces with Underscore'):
104
+ st.session_state.modified_columns = [col.replace(" ", "_") for col in before_col]
105
+ changes_made = True
106
+ if st.button('Capitalize First Letters'):
107
+ st.session_state.modified_columns = [col.title() for col in before_col]
108
+ changes_made = True
109
+
110
+ if changes_made:
111
+ df.columns = st.session_state.modified_columns
112
+ st.session_state["data"] = df
113
+ st.success("Changes applied successfully.")
114
+ st.table(pd.DataFrame(df.columns, columns=['Modified Columns']))
115
+
116
+ st.write("### *Modify a Specific Column Name*")
117
+ column_select = st.selectbox('Select column to modify', options=st.session_state.modified_columns)
118
+ new_column_name = st.text_input('Enter new column name')
119
+ if st.button('Update Column Name'):
120
+ if column_select and new_column_name:
121
+ st.session_state.modified_columns = [new_column_name if col == column_select else col for col in st.session_state.modified_columns]
122
+ df.columns = st.session_state.modified_columns
123
+ st.session_state["data"] = df
124
+ st.success("Column name updated.")
125
+ st.table(pd.DataFrame(df.columns, columns=['Modified Columns']))
126
+
127
+ else:
128
+ st.warning("Please upload a dataset first.")