trohith89 commited on
Commit
bb9c90b
·
verified ·
1 Parent(s): 81da568

Update pages/2_Data_CLeaning_and_Preprocessing.py

Browse files
pages/2_Data_CLeaning_and_Preprocessing.py CHANGED
@@ -3,9 +3,15 @@ import pandas as pd
3
  import os
4
  from io import StringIO
5
  import sys
 
 
 
 
 
 
6
 
7
  # Page Title
8
- st.markdown("<h1 style='text-align:center; color:#008080;'>Data Cleaning and Processing</h1>", unsafe_allow_html=True)
9
 
10
  # Access dataset from session state
11
  df = st.session_state.get("dataset")
@@ -27,13 +33,70 @@ if df is not None:
27
 
28
  st.subheader("Shape of the Dataset:")
29
  st.write(df.shape)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  else:
31
  st.warning("No dataset found. Please upload a dataset on the Home page.")
32
 
33
 
34
  # Define the URL of the background image (use your own image URL)
35
  # Apply custom CSS for the background image and overlay
36
- background_image_url = "https://cdn-uploads.huggingface.co/production/uploads/675fab3a2d0851e23d23cad3/FlisTFfpU7flDCWj_KttH.jpeg"
37
 
38
  st.markdown(
39
  f"""
 
3
  import os
4
  from io import StringIO
5
  import sys
6
+ import streamlit as st
7
+ import pandas as pd
8
+ import plotly.graph_objects as go
9
+ import plotly.express as px
10
+ from plotly.subplots import make_subplots
11
+ from io import StringIO
12
 
13
  # Page Title
14
+ st.markdown("<h1 style='text-align:center; color:wh;'>Data Cleaning and Processing</h1>", unsafe_allow_html=True)
15
 
16
  # Access dataset from session state
17
  df = st.session_state.get("dataset")
 
33
 
34
  st.subheader("Shape of the Dataset:")
35
  st.write(df.shape)
36
+
37
+ # Visualize Numeric Data
38
+ numeric_columns = data.select_dtypes(include=['float64', 'int64']).columns
39
+ if len(numeric_columns) > 0:
40
+ st.subheader("Histograms for Numeric Columns:")
41
+ fig = make_subplots(rows=len(numeric_columns), cols=1, subplot_titles=numeric_columns)
42
+ for i, col in enumerate(numeric_columns):
43
+ hist = px.histogram(data, x=col, nbins=30, title=f'Histogram of {col}')
44
+ fig.add_trace(hist.data[0], row=i + 1, col=1)
45
+
46
+ fig.update_layout(height=500 * len(numeric_columns), title_text="Histograms for Numeric Columns")
47
+ st.plotly_chart(fig)
48
+
49
+ st.subheader("Boxplots for Numeric Columns:")
50
+ fig = make_subplots(rows=len(numeric_columns), cols=1, subplot_titles=numeric_columns)
51
+ for i, col in enumerate(numeric_columns):
52
+ boxplot = px.box(data, y=col, title=f'Boxplot of {col}')
53
+ fig.add_trace(boxplot.data[0], row=i + 1, col=1)
54
+
55
+ fig.update_layout(height=500 * len(numeric_columns), title_text="Boxplots for Numeric Columns")
56
+ st.plotly_chart(fig)
57
+ else:
58
+ st.warning("No numeric columns available for visualization.")
59
+
60
+ # Visualize Categorical Data
61
+ categorical_columns = data.select_dtypes(include=['object', 'category']).columns
62
+ if len(categorical_columns) > 0:
63
+ st.subheader("Bar Plots for Categorical Columns:")
64
+ selected_cat_col = st.selectbox("Select a Categorical Column", categorical_columns)
65
+
66
+ st.write(f"Value Counts for '{selected_cat_col}':")
67
+ st.write(data[selected_cat_col].value_counts())
68
+
69
+ fig = px.bar(data, x=selected_cat_col, title=f'Bar Plot of {selected_cat_col}', color=selected_cat_col)
70
+ st.plotly_chart(fig)
71
+ else:
72
+ st.warning("No categorical columns available for visualization.")
73
+
74
+ # Correlation Matrix for Numeric Columns
75
+ if len(numeric_columns) > 1:
76
+ st.subheader("Correlation Matrix:")
77
+ corr_matrix = data[numeric_columns].corr()
78
+ fig = px.imshow(corr_matrix, title="Correlation Matrix", color_continuous_scale='coolwarm')
79
+ st.plotly_chart(fig)
80
+
81
+ st.subheader("Cleaned Dataset:")
82
+ cleaned_data = data.drop_duplicates()
83
+ st.write(cleaned_data)
84
+
85
+ cleaned_csv = cleaned_data.to_csv(index=False).encode('utf-8')
86
+ st.download_button(
87
+ label="Download Cleaned Dataset",
88
+ data=cleaned_csv,
89
+ file_name="cleaned_dataset.csv",
90
+ mime="text/csv"
91
+ )
92
+
93
  else:
94
  st.warning("No dataset found. Please upload a dataset on the Home page.")
95
 
96
 
97
  # Define the URL of the background image (use your own image URL)
98
  # Apply custom CSS for the background image and overlay
99
+ background_image_url = "https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/6EvD_NR-zVMVJI5okpx8c.jpeg"
100
 
101
  st.markdown(
102
  f"""