File size: 1,865 Bytes
5c802bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import streamlit as st
import pandas as pd
import numpy as np
import io
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import base64




def show_general_data_statistics():
    if "data" in st.session_state:
        data = st.session_state["data"]
        num_var = len(data.columns)
        num_rows = len(data)
        missing_cells = data.isnull().sum().sum()
        missing_cells_percent = (missing_cells / (data.size)) * 100
        duplicate_rows = data.duplicated().sum()
        duplicate_rows_percent = (duplicate_rows / num_rows) * 100
        var_types = data.dtypes.value_counts()

        st.write("### General Data Statistics:")
        st.write(f"- **Number of Variables:**   {num_var}")
        st.write(f"- **Number of Rows:**    {num_rows}")
        st.write(f"- **Missing Cells:**     {missing_cells}")
        st.write(f"- **Missing Cells (%):**     {missing_cells_percent:.2f}%")
        st.write(f"- **Duplicate Rows:**    {duplicate_rows}")
        st.write(f"- **Duplicate Rows (%):**    {duplicate_rows_percent:.2f}%")
        st.write("#### Variable Types:")
        st.write(var_types)
    else:
        st.warning("Please upload a dataset first.")




def describe_data():
    st.title("Describe Data")

    if "data" in st.session_state:
        data = st.session_state["data"]
        st.write("Dataset Description:")
        st.write(data.describe())
    else:
        st.warning("Please upload a dataset first.")


def info_data():
    st.title("Dataset Info")

    if "data" in st.session_state:
        data = st.session_state["data"]
        buffer = io.StringIO()
        data.info(buf=buffer)
        info = buffer.getvalue()
        st.text(info)
    else:
        st.warning("Please upload a dataset first.")