Spaces:
Build error
Build error
Upload functions.py
Browse files- functions.py +63 -0
functions.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
def df_info(df):
|
| 6 |
+
df.columns = df.columns.str.replace(' ', '_')
|
| 7 |
+
buffer = io.StringIO()
|
| 8 |
+
df.info(buf=buffer)
|
| 9 |
+
s = buffer.getvalue()
|
| 10 |
+
|
| 11 |
+
df_info = s.split('\n')
|
| 12 |
+
|
| 13 |
+
counts = []
|
| 14 |
+
names = []
|
| 15 |
+
nn_count = []
|
| 16 |
+
dtype = []
|
| 17 |
+
for i in range(5, len(df_info)-3):
|
| 18 |
+
line = df_info[i].split()
|
| 19 |
+
counts.append(line[0])
|
| 20 |
+
names.append(line[1])
|
| 21 |
+
nn_count.append(line[2])
|
| 22 |
+
dtype.append(line[4])
|
| 23 |
+
|
| 24 |
+
df_info_dataframe = pd.DataFrame(data = {'#':counts, 'Column':names, 'Non-Null Count':nn_count, 'Data Type':dtype})
|
| 25 |
+
return df_info_dataframe.drop('#', axis = 1)
|
| 26 |
+
|
| 27 |
+
def df_isnull(df):
|
| 28 |
+
res = pd.DataFrame(df.isnull().sum()).reset_index()
|
| 29 |
+
res['Percentage'] = round(res[0] / df.shape[0] * 100, 2)
|
| 30 |
+
res['Percentage'] = res['Percentage'].astype(str) + '%'
|
| 31 |
+
return res.rename(columns = {'index':'Column', 0:'Number of null values'})
|
| 32 |
+
|
| 33 |
+
def number_of_outliers(df):
|
| 34 |
+
|
| 35 |
+
df = df.select_dtypes(exclude = 'object')
|
| 36 |
+
|
| 37 |
+
Q1 = df.quantile(0.25)
|
| 38 |
+
Q3 = df.quantile(0.75)
|
| 39 |
+
IQR = Q3 - Q1
|
| 40 |
+
|
| 41 |
+
ans = ((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).sum()
|
| 42 |
+
df = pd.DataFrame(ans).reset_index().rename(columns = {'index':'column', 0:'count_of_outliers'})
|
| 43 |
+
return df
|
| 44 |
+
|
| 45 |
+
def space(num_lines=1):
|
| 46 |
+
for _ in range(num_lines):
|
| 47 |
+
st.write("")
|
| 48 |
+
|
| 49 |
+
def sidebar_space(num_lines=1):
|
| 50 |
+
for _ in range(num_lines):
|
| 51 |
+
st.sidebar.write("")
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def sidebar_multiselect_container(massage, arr, key):
|
| 55 |
+
|
| 56 |
+
container = st.sidebar.container()
|
| 57 |
+
select_all_button = st.sidebar.checkbox("Select all for " + key + " plots")
|
| 58 |
+
if select_all_button:
|
| 59 |
+
selected_num_cols = container.multiselect(massage, arr, default = list(arr))
|
| 60 |
+
else:
|
| 61 |
+
selected_num_cols = container.multiselect(massage, arr, default = arr[0])
|
| 62 |
+
|
| 63 |
+
return selected_num_cols
|