Abubakari commited on
Commit
7fc994f
·
1 Parent(s): 0118122

Upload functions.py

Browse files
Files changed (1) hide show
  1. functions.py +63 -0
functions.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import pandas as pd
3
+ import streamlit as st
4
+
5
+ def df_info(df):
6
+ df.columns = df.columns.str.replace(' ', '_')
7
+ buffer = io.StringIO()
8
+ df.info(buf=buffer)
9
+ s = buffer.getvalue()
10
+
11
+ df_info = s.split('\n')
12
+
13
+ counts = []
14
+ names = []
15
+ nn_count = []
16
+ dtype = []
17
+ for i in range(5, len(df_info)-3):
18
+ line = df_info[i].split()
19
+ counts.append(line[0])
20
+ names.append(line[1])
21
+ nn_count.append(line[2])
22
+ dtype.append(line[4])
23
+
24
+ df_info_dataframe = pd.DataFrame(data = {'#':counts, 'Column':names, 'Non-Null Count':nn_count, 'Data Type':dtype})
25
+ return df_info_dataframe.drop('#', axis = 1)
26
+
27
+ def df_isnull(df):
28
+ res = pd.DataFrame(df.isnull().sum()).reset_index()
29
+ res['Percentage'] = round(res[0] / df.shape[0] * 100, 2)
30
+ res['Percentage'] = res['Percentage'].astype(str) + '%'
31
+ return res.rename(columns = {'index':'Column', 0:'Number of null values'})
32
+
33
+ def number_of_outliers(df):
34
+
35
+ df = df.select_dtypes(exclude = 'object')
36
+
37
+ Q1 = df.quantile(0.25)
38
+ Q3 = df.quantile(0.75)
39
+ IQR = Q3 - Q1
40
+
41
+ ans = ((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).sum()
42
+ df = pd.DataFrame(ans).reset_index().rename(columns = {'index':'column', 0:'count_of_outliers'})
43
+ return df
44
+
45
+ def space(num_lines=1):
46
+ for _ in range(num_lines):
47
+ st.write("")
48
+
49
+ def sidebar_space(num_lines=1):
50
+ for _ in range(num_lines):
51
+ st.sidebar.write("")
52
+
53
+
54
+ def sidebar_multiselect_container(massage, arr, key):
55
+
56
+ container = st.sidebar.container()
57
+ select_all_button = st.sidebar.checkbox("Select all for " + key + " plots")
58
+ if select_all_button:
59
+ selected_num_cols = container.multiselect(massage, arr, default = list(arr))
60
+ else:
61
+ selected_num_cols = container.multiselect(massage, arr, default = arr[0])
62
+
63
+ return selected_num_cols