File size: 4,459 Bytes
47646a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f46bcdd
 
 
 
 
 
 
47646a2
f46bcdd
 
 
 
 
 
f3e356d
 
d831f3c
 
 
 
 
 
 
 
 
 
 
 
 
f3e356d
 
 
 
 
 
 
b1b9e97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d7944c
b1b9e97
 
 
 
 
 
 
 
 
 
f46bcdd
 
e5c5114
47646a2
 
 
7df2b4b
47646a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e46593a
47646a2
 
9eee39f
 
47646a2
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import gradio as gr
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def dataset_change(dataset):   
    df = pd.read_csv(dataset.name)
    features = df.columns
    features_object_list = [feature for feature in features]
    describe = df.describe(include='all')
    print(describe)
    return describe.reset_index(), gr.Dropdown.update(choices = features_object_list), gr.Dropdown.update(choices = features_object_list)

def feature_select(dataset, feature, hue = None):
    df = pd.read_csv(dataset.name)
    non_numeric_cols = df.select_dtypes('object').columns.tolist()
    
    if feature in non_numeric_cols:
      kde = False
      plot2 = plt.figure()
      if hue:
        sns.countplot(x = feature, data = df, palette='rainbow', hue = hue)
      else:
        sns.countplot(x = feature, data = df, palette='rainbow')
    else:
      kde = True
      plot2 = plt.figure()
      if hue:
        sns.boxplot(x = feature, data = df, hue = hue)
      else:
        sns.boxplot(x = feature, data = df )

    plot1 = plt.figure()
    if hue:
      sns.histplot(data = df, x = feature, kde = kde, hue = hue, multiple="stack")
    else:
      sns.histplot(data = df, x = feature, kde = kde)
   
    return plot1, plot2

css = """
footer {display:none !important}
.overflow-x-scroll {
    overflow-x: scroll !important;
    height: 15rem !important;
    overflow-y: scroll !important;
}

.max-h-\[30rem\] {max-height: 18rem !important;}

.hover\:bg-orange-50:hover {
    --tw-bg-opacity: 1 !important;
    background-color: rgb(229,225,255) !important;
}

.output-markdown h2{
    z-index: 14;
    align-self: flex-start;
    min-width: 0px;
    order: 5;
    min-height: 0px;
    height: max-content;
    flex-grow: 0;
    flex-shrink: 0;
    width: calc(100% - 0px);
    margin: 5px 0px;
    white-space: pre-wrap;
    overflow: visible;
    word-break: break-word;
    font-size: 18px !important;
    font-weight: 500 !important;
    color: rgb(9, 23, 71) !important;
    line-height: 1 !important;
    border-radius: 0px !important;
    opacity: 1 !important;
}

.gr-button-lg {
    z-index: 14;
    width: 113px !important;
    height: 30px !important;
    left: 0px;
    top: 0px;
    padding: 0px;
    cursor: pointer !important; 
    background: none rgb(17, 20, 45) !important;
    border: none !important;
    text-align: center !important;
    font-size: 14px !important;
    font-weight: 500 !important;
    color: rgb(255, 255, 255) !important;
    line-height: 1 !important;
    border-radius: 6px !important;
    transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
    box-shadow: none !important;
}
.gr-button-lg:hover{
    z-index: 14;
    width: 113px !important;
    height: 30px !important;
    left: 0px;
    top: 0px;
    padding: 0px;
    cursor: pointer !important; 
    background: none rgb(66, 133, 244) !important;
    border: none !important;
    text-align: center !important;
    font-size: 14px !important;
    font-weight: 500 !important;
    color: rgb(255, 255, 255) !important;
    line-height: 1 !important;
    border-radius: 6px !important;
    transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
    box-shadow: rgb(0 0 0 / 23%) 0px 1px 7px 0px !important;
}
"""

with gr.Blocks(title="Describe Dataset | Data Science Dojo", css = css) as demo:
    gr.Markdown("""## Input Dataset""")
    with gr.Row():
        dataset = gr.File()
    gr.Markdown("""## Dataset Description""")
    with gr.Row():
        dataframe = gr.Dataframe()
    gr.Markdown("""## Select the feature to visualize""")
    with gr.Row():
      with gr.Column():
        features = gr.Dropdown(label="Select feature to visualize")
      with gr.Column():
        hue = gr.Dropdown(label="Select hue")
    with gr.Row():
      btn = gr.Button("Visualize")

    gr.Markdown("""## Visualization""")
    with gr.Row():
        plot1 = gr.Plot()
    with gr.Row():
        plot2 = gr.Plot()
    
    gr.Examples(
          examples=[["boston.csv"]],
          fn = dataset_change,
          inputs = dataset, 
          outputs = [dataframe, features, hue],
          cache_examples=True
          )

    dataset.change(fn=dataset_change, 
        inputs = dataset, 
        outputs = [dataframe, features, hue]
        )
    
    btn.click(fn=feature_select, 
        inputs=[dataset, features, hue],
        outputs=[plot1, plot2]
        )
    
demo.launch(debug=True)