trohith89 commited on
Commit
6ee1f18
·
verified ·
1 Parent(s): 8984d08

Update pages/2_Data_CLeaning_and_Preprocessing.py

Browse files
pages/2_Data_CLeaning_and_Preprocessing.py CHANGED
@@ -10,7 +10,6 @@ st.markdown("""
10
  ### Perform EDA and Clean Data
11
  Upload a CSV file to begin. This app will provide basic insights into the dataset,
12
  highlight missing values, and visualize numeric and categorical columns.
13
-
14
  ---
15
  """)
16
 
@@ -89,6 +88,26 @@ if uploaded_file is not None:
89
  sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', ax=ax)
90
  st.pyplot(fig)
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  # Data Cleaning Section
93
  st.write("### Cleaned Dataset")
94
  cleaned_data = data.drop_duplicates()
@@ -102,25 +121,7 @@ if uploaded_file is not None:
102
  file_name="cleaned_dataset.csv",
103
  mime="text/csv"
104
  )
105
- # Check the columns before renaming
106
- st.write("### Dataset Columns:")
107
- st.write(data.columns)
108
-
109
- # Renaming columns if they exist
110
- if 'ProductCategory' in data.columns and 'ProductBrand' in data.columns and 'ProductPrice' in data.columns:
111
- data = data.rename(columns={'ProductCategory': 'Category', 'ProductBrand': 'Brand', 'ProductPrice': 'Price'})
112
- st.success("Columns renamed successfully!")
113
- else:
114
- st.warning("Columns 'ProductCategory', 'ProductBrand', or 'ProductPrice' not found in the dataset.")
115
-
116
- # Now check if 'Category' exists and plot
117
- if 'Category' in data.columns:
118
- st.write("### Bar Plot for Category")
119
- fig, ax = plt.subplots()
120
- sns.countplot(x='Category', data=data, palette='viridis', ax=ax)
121
- st.pyplot(fig)
122
- else:
123
- st.warning("'Category' column not found for plotting.")
124
  except pd.errors.EmptyDataError:
125
  st.error("The uploaded CSV file is empty. Please upload a valid file.")
126
  except pd.errors.ParserError:
 
10
  ### Perform EDA and Clean Data
11
  Upload a CSV file to begin. This app will provide basic insights into the dataset,
12
  highlight missing values, and visualize numeric and categorical columns.
 
13
  ---
14
  """)
15
 
 
88
  sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', ax=ax)
89
  st.pyplot(fig)
90
 
91
+ # Check the columns before renaming
92
+ st.write("### Dataset Columns:")
93
+ st.write(data.columns)
94
+
95
+ # Renaming columns if they exist
96
+ if 'ProductCategory' in data.columns and 'ProductBrand' in data.columns and 'ProductPrice' in data.columns:
97
+ data = data.rename(columns={'ProductCategory': 'Category', 'ProductBrand': 'Brand', 'ProductPrice': 'Price'})
98
+ st.success("Columns renamed successfully!")
99
+ else:
100
+ st.warning("Columns 'ProductCategory', 'ProductBrand', or 'ProductPrice' not found in the dataset.")
101
+
102
+ # Now check if 'Category' exists and plot
103
+ if 'Category' in data.columns:
104
+ st.write("### Bar Plot for Category")
105
+ fig, ax = plt.subplots()
106
+ sns.countplot(x='Category', data=data, palette='viridis', ax=ax)
107
+ st.pyplot(fig)
108
+ else:
109
+ st.warning("'Category' column not found for plotting.")
110
+
111
  # Data Cleaning Section
112
  st.write("### Cleaned Dataset")
113
  cleaned_data = data.drop_duplicates()
 
121
  file_name="cleaned_dataset.csv",
122
  mime="text/csv"
123
  )
124
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  except pd.errors.EmptyDataError:
126
  st.error("The uploaded CSV file is empty. Please upload a valid file.")
127
  except pd.errors.ParserError: