File size: 12,328 Bytes
ac0793a
1d76c68
 
ac0793a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
import gradio as gr
import plotly.express as px
import pandas as pd
import io

# Store datasets in a dictionary (acts as our "database")
datasets = {}

# Load default dataset
default_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
datasets['Gapminder'] = default_df

# Function to load different built-in datasets
def load_builtin_dataset(dataset_name):
    """Load various built-in datasets"""
    try:
        if dataset_name == "Gapminder":
            df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
            datasets[dataset_name] = df
            return df, f"βœ… Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
        
        elif dataset_name == "Iris":
            df = px.data.iris()
            datasets[dataset_name] = df
            return df, f"βœ… Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
        
        elif dataset_name == "Tips":
            df = px.data.tips()
            datasets[dataset_name] = df
            return df, f"βœ… Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
        
        elif dataset_name == "Stock Data":
            df = px.data.stocks()
            # Reshape from wide to long format for better analysis
            df = df.melt(id_vars='date', var_name='company', value_name='stock_price')
            df['date'] = pd.to_datetime(df['date'])
            datasets[dataset_name] = df
            return df, f"βœ… Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
        
        elif dataset_name == "Wind Data":
            df = px.data.wind()
            datasets[dataset_name] = df
            return df, f"βœ… Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
            
    except Exception as e:
        return None, f"❌ Error loading {dataset_name}: {str(e)}"

# Function to handle file uploads
def upload_dataset(file, custom_name):
    """Handle CSV/Excel file uploads"""
    if file is None:
        return None, "Please upload a file", gr.update(choices=list(datasets.keys()))
    
    try:
        # Determine file type and read accordingly
        if file.name.endswith('.csv'):
            df = pd.read_csv(file.name)
        elif file.name.endswith(('.xlsx', '.xls')):
            df = pd.read_excel(file.name)
        else:
            return None, "❌ Unsupported file format. Please upload CSV or Excel.", gr.update()
        
        # Store with custom name or filename
        dataset_name = custom_name if custom_name else file.name.split('/')[-1].split('.')[0]
        datasets[dataset_name] = df
        
        return df, f"βœ… Uploaded {dataset_name}: {len(df)} rows, {len(df.columns)} columns", gr.update(choices=list(datasets.keys()), value=dataset_name)
        
    except Exception as e:
        return None, f"❌ Error reading file: {str(e)}", gr.update()

# Function to switch between datasets
def switch_dataset(dataset_name):
    """Switch to a different dataset"""
    if dataset_name in datasets:
        df = datasets[dataset_name]
        # Get column info
        numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
        categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
        all_cols = df.columns.tolist()
        
        info = f"""
        ### Dataset: {dataset_name}
        - **Rows**: {len(df)}
        - **Columns**: {len(df.columns)}
        - **Numeric columns**: {', '.join(numeric_cols[:5])}{'...' if len(numeric_cols) > 5 else ''}
        - **Categorical columns**: {', '.join(categorical_cols[:5])}{'...' if len(categorical_cols) > 5 else ''}
        """
        
        return (
            df.head(10),  # Preview
            info,  # Info
            gr.update(choices=all_cols, value=all_cols[0] if all_cols else None),  # X-axis
            gr.update(choices=numeric_cols, value=numeric_cols[0] if numeric_cols else None),  # Y-axis
            gr.update(choices=[""] + categorical_cols, value=""),  # Color
            gr.update(choices=[""] + numeric_cols, value=""),  # Size
            df  # Store current df
        )
    else:
        return None, "Dataset not found", gr.update(), gr.update(), gr.update(), gr.update(), None

# Dynamic plotting function
def create_plot(df, plot_type, x_col, y_col, color_col, size_col):
    """Create different plot types based on current dataset and selections"""
    if df is None or x_col is None:
        return None
    
    try:
        # Handle empty string selections
        color_col = None if color_col == "" else color_col
        size_col = None if size_col == "" else size_col
        
        # Create different plot types
        if plot_type == "Scatter":
            fig = px.scatter(df, x=x_col, y=y_col, color=color_col, size=size_col,
                           title=f"Scatter: {x_col} vs {y_col}")
            
        elif plot_type == "Line":
            fig = px.line(df, x=x_col, y=y_col, color=color_col,
                         title=f"Line: {x_col} vs {y_col}")
            
        elif plot_type == "Bar":
            # For bar charts, aggregate if necessary
            if color_col:
                fig = px.bar(df, x=x_col, y=y_col, color=color_col,
                           title=f"Bar: {x_col} vs {y_col}")
            else:
                fig = px.bar(df, x=x_col, y=y_col,
                           title=f"Bar: {x_col} vs {y_col}")
                
        elif plot_type == "Histogram":
            fig = px.histogram(df, x=x_col, color=color_col,
                             title=f"Histogram of {x_col}")
            
        elif plot_type == "Box":
            fig = px.box(df, x=x_col, y=y_col, color=color_col,
                        title=f"Box plot: {x_col} vs {y_col}")
            
        elif plot_type == "Heatmap":
            # Create correlation matrix for numeric columns
            numeric_df = df.select_dtypes(include=['number'])
            if len(numeric_df.columns) > 1:
                corr = numeric_df.corr()
                fig = px.imshow(corr, text_auto=True, title="Correlation Heatmap")
            else:
                return None
        
        fig.update_layout(height=500)
        return fig
        
    except Exception as e:
        print(f"Plot error: {e}")
        return None

# Create the Gradio interface
with gr.Blocks(title="Dynamic Dataset Explorer", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # πŸ“Š Dynamic Dataset Explorer
    Upload your own data or explore built-in datasets with automatic visualization
    """)
    
    # Hidden state to store current dataframe
    current_df = gr.State(value=default_df)
    
    with gr.Tabs():
        # Tab 1: Dataset Management
        with gr.TabItem("πŸ“ Dataset Management"):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### Load Built-in Dataset")
                    builtin_choice = gr.Dropdown(
                        choices=["Gapminder", "Iris", "Tips", "Stock Data", "Wind Data"],
                        value="Gapminder",
                        label="Select Dataset"
                    )
                    load_builtin_btn = gr.Button("Load Dataset", variant="primary")
                    
                    gr.Markdown("### Upload Custom Dataset")
                    file_upload = gr.File(label="Upload CSV or Excel", file_types=[".csv", ".xlsx", ".xls"])
                    custom_name = gr.Textbox(label="Dataset Name (optional)", placeholder="My Dataset")
                    upload_btn = gr.Button("Upload", variant="primary")
                    
                    gr.Markdown("### Active Datasets")
                    dataset_selector = gr.Dropdown(
                        choices=list(datasets.keys()),
                        value="Gapminder",
                        label="Switch Dataset"
                    )
                
                with gr.Column(scale=2):
                    status_msg = gr.Markdown("Ready to load data")
                    data_info = gr.Markdown()
                    data_preview = gr.Dataframe(label="Data Preview (first 10 rows)")
        
        # Tab 2: Dynamic Visualization
        with gr.TabItem("πŸ“ˆ Visualization"):
            with gr.Row():
                with gr.Column(scale=1):
                    plot_type = gr.Radio(
                        choices=["Scatter", "Line", "Bar", "Histogram", "Box", "Heatmap"],
                        value="Scatter",
                        label="Plot Type"
                    )
                    
                    x_axis = gr.Dropdown(label="X Axis", choices=[], interactive=True)
                    y_axis = gr.Dropdown(label="Y Axis", choices=[], interactive=True)
                    color_by = gr.Dropdown(label="Color By (optional)", choices=[], interactive=True)
                    size_by = gr.Dropdown(label="Size By (optional)", choices=[], interactive=True)
                    
                    plot_btn = gr.Button("Create Plot", variant="primary")
                
                with gr.Column(scale=2):
                    plot_output = gr.Plot(label="Visualization")
        
        # Tab 3: Data Analysis
        with gr.TabItem("πŸ” Data Analysis"):
            with gr.Row():
                with gr.Column():
                    analysis_type = gr.Radio(
                        choices=["Summary Statistics", "Missing Values", "Data Types", "Unique Values"],
                        value="Summary Statistics",
                        label="Analysis Type"
                    )
                    analyze_btn = gr.Button("Analyze", variant="primary")
                
                with gr.Column():
                    analysis_output = gr.Markdown()
            
            def analyze_data(df, analysis_type):
                """Perform different types of data analysis"""
                if df is None:
                    return "No dataset loaded"
                
                if analysis_type == "Summary Statistics":
                    return f"```\n{df.describe().to_string()}\n```"
                elif analysis_type == "Missing Values":
                    missing = df.isnull().sum()
                    return f"```\n{missing[missing > 0].to_string()}\n```" if missing.any() else "No missing values!"
                elif analysis_type == "Data Types":
                    return f"```\n{df.dtypes.to_string()}\n```"
                elif analysis_type == "Unique Values":
                    unique_counts = df.nunique()
                    return f"```\n{unique_counts.to_string()}\n```"
    
    # Event handlers
    load_builtin_btn.click(
        load_builtin_dataset,
        inputs=[builtin_choice],
        outputs=[data_preview, status_msg]
    ).then(
        lambda: gr.update(choices=list(datasets.keys())),
        outputs=[dataset_selector]
    )
    
    upload_btn.click(
        upload_dataset,
        inputs=[file_upload, custom_name],
        outputs=[data_preview, status_msg, dataset_selector]
    )
    
    # When dataset is switched, update everything
    dataset_selector.change(
        switch_dataset,
        inputs=[dataset_selector],
        outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
    )
    
    # Create plot based on selections
    plot_btn.click(
        create_plot,
        inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
        outputs=[plot_output]
    )
    
    # Auto-update plot when parameters change
    for component in [plot_type, x_axis, y_axis, color_by, size_by]:
        component.change(
            create_plot,
            inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
            outputs=[plot_output]
        )
    
    # Analysis
    analyze_btn.click(
        analyze_data,
        inputs=[current_df, analysis_type],
        outputs=[analysis_output]
    )
    
    # Load initial dataset
    demo.load(
        switch_dataset,
        inputs=[dataset_selector],
        outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
    )

if __name__ == "__main__":
    demo.launch(share=False, debug=True)