File size: 4,899 Bytes
587e047 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import streamlit as st
import pandas as pd
import numpy as np
import io
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import base64
def preview_data():
if "data" in st.session_state:
data = st.session_state["data"]
st.write("### Dataset Preview Options:")
preview_option = st.radio(
"Select how to preview the dataset:",
options=["Head", "Tail", "Custom Number of Rows"],
index=0
)
if preview_option == "Head":
st.write("### First 5 Rows of the Dataset:")
st.dataframe(data.head())
elif preview_option == "Tail":
st.write("### Last 5 Rows of the Dataset:")
st.dataframe(data.tail())
elif preview_option == "Custom Number of Rows":
number = st.slider("Select Number of Rows to Display:", 1, len(data))
st.write(f"### First {number} Rows of the Dataset:")
st.dataframe(data.head(number))
if st.checkbox("Show all data"):
st.write(data)
if st.checkbox("Show Column Names"):
st.write(data.columns)
if st.checkbox("Show Dimensions"):
st.write(data.shape)
else:
st.warning("Please upload a dataset to view options.")
def data_cleaning():
if "data" in st.session_state:
data = st.session_state["data"]
st.subheader("Data Cleaning")
col_option = st.selectbox("Choose your option", [
"Check all numeric features are numeric?", "Show unique values of categorical features"])
if col_option == "Check all numeric features are numeric?":
st.write("Converting all numeric columns to numeric types...")
numeric_columns = list(data.select_dtypes(include=np.number).columns)
for col in numeric_columns:
data[col] = pd.to_numeric(data[col], errors='coerce')
st.session_state["data"] = data
st.success("Done!")
elif col_option == "Show unique values of categorical features":
st.write("Unique values for categorical features:")
for column in data.columns:
if data[column].dtype == object:
st.write(f"{column}: {data[column].unique()}")
else:
st.warning("Please upload a dataset to perform data cleaning.")
def modify_column_names():
st.title("Modify Column Names")
if "data" in st.session_state:
df = st.session_state["data"]
if "modified_columns" not in st.session_state:
st.session_state.modified_columns = list(df.columns)
st.write('### *Current Column Names*')
st.table(df.columns)
st.write('### *Modify Column Names*')
with st.expander("Modify Column Names", expanded=True):
before_col = st.session_state.modified_columns
before_col_df = pd.DataFrame(before_col, columns=['Column Name'])
st.table(before_col_df)
col3, col4, col5, col6 = st.columns(4)
changes_made = False
if st.button('Convert to Uppercase'):
st.session_state.modified_columns = [col.upper() for col in before_col]
changes_made = True
if st.button('Convert to Lowercase'):
st.session_state.modified_columns = [col.lower() for col in before_col]
changes_made = True
if st.button('Replace Spaces with Underscore'):
st.session_state.modified_columns = [col.replace(" ", "_") for col in before_col]
changes_made = True
if st.button('Capitalize First Letters'):
st.session_state.modified_columns = [col.title() for col in before_col]
changes_made = True
if changes_made:
df.columns = st.session_state.modified_columns
st.session_state["data"] = df
st.success("Changes applied successfully.")
st.table(pd.DataFrame(df.columns, columns=['Modified Columns']))
st.write("### *Modify a Specific Column Name*")
column_select = st.selectbox('Select column to modify', options=st.session_state.modified_columns)
new_column_name = st.text_input('Enter new column name')
if st.button('Update Column Name'):
if column_select and new_column_name:
st.session_state.modified_columns = [new_column_name if col == column_select else col for col in st.session_state.modified_columns]
df.columns = st.session_state.modified_columns
st.session_state["data"] = df
st.success("Column name updated.")
st.table(pd.DataFrame(df.columns, columns=['Modified Columns']))
else:
st.warning("Please upload a dataset first.")
|