File size: 12,328 Bytes
ac0793a 1d76c68 ac0793a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 | import gradio as gr
import plotly.express as px
import pandas as pd
import io
# Store datasets in a dictionary (acts as our "database")
datasets = {}
# Load default dataset
default_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
datasets['Gapminder'] = default_df
# Function to load different built-in datasets
def load_builtin_dataset(dataset_name):
"""Load various built-in datasets"""
try:
if dataset_name == "Gapminder":
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
datasets[dataset_name] = df
return df, f"β
Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
elif dataset_name == "Iris":
df = px.data.iris()
datasets[dataset_name] = df
return df, f"β
Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
elif dataset_name == "Tips":
df = px.data.tips()
datasets[dataset_name] = df
return df, f"β
Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
elif dataset_name == "Stock Data":
df = px.data.stocks()
# Reshape from wide to long format for better analysis
df = df.melt(id_vars='date', var_name='company', value_name='stock_price')
df['date'] = pd.to_datetime(df['date'])
datasets[dataset_name] = df
return df, f"β
Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
elif dataset_name == "Wind Data":
df = px.data.wind()
datasets[dataset_name] = df
return df, f"β
Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
except Exception as e:
return None, f"β Error loading {dataset_name}: {str(e)}"
# Function to handle file uploads
def upload_dataset(file, custom_name):
"""Handle CSV/Excel file uploads"""
if file is None:
return None, "Please upload a file", gr.update(choices=list(datasets.keys()))
try:
# Determine file type and read accordingly
if file.name.endswith('.csv'):
df = pd.read_csv(file.name)
elif file.name.endswith(('.xlsx', '.xls')):
df = pd.read_excel(file.name)
else:
return None, "β Unsupported file format. Please upload CSV or Excel.", gr.update()
# Store with custom name or filename
dataset_name = custom_name if custom_name else file.name.split('/')[-1].split('.')[0]
datasets[dataset_name] = df
return df, f"β
Uploaded {dataset_name}: {len(df)} rows, {len(df.columns)} columns", gr.update(choices=list(datasets.keys()), value=dataset_name)
except Exception as e:
return None, f"β Error reading file: {str(e)}", gr.update()
# Function to switch between datasets
def switch_dataset(dataset_name):
"""Switch to a different dataset"""
if dataset_name in datasets:
df = datasets[dataset_name]
# Get column info
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
all_cols = df.columns.tolist()
info = f"""
### Dataset: {dataset_name}
- **Rows**: {len(df)}
- **Columns**: {len(df.columns)}
- **Numeric columns**: {', '.join(numeric_cols[:5])}{'...' if len(numeric_cols) > 5 else ''}
- **Categorical columns**: {', '.join(categorical_cols[:5])}{'...' if len(categorical_cols) > 5 else ''}
"""
return (
df.head(10), # Preview
info, # Info
gr.update(choices=all_cols, value=all_cols[0] if all_cols else None), # X-axis
gr.update(choices=numeric_cols, value=numeric_cols[0] if numeric_cols else None), # Y-axis
gr.update(choices=[""] + categorical_cols, value=""), # Color
gr.update(choices=[""] + numeric_cols, value=""), # Size
df # Store current df
)
else:
return None, "Dataset not found", gr.update(), gr.update(), gr.update(), gr.update(), None
# Dynamic plotting function
def create_plot(df, plot_type, x_col, y_col, color_col, size_col):
"""Create different plot types based on current dataset and selections"""
if df is None or x_col is None:
return None
try:
# Handle empty string selections
color_col = None if color_col == "" else color_col
size_col = None if size_col == "" else size_col
# Create different plot types
if plot_type == "Scatter":
fig = px.scatter(df, x=x_col, y=y_col, color=color_col, size=size_col,
title=f"Scatter: {x_col} vs {y_col}")
elif plot_type == "Line":
fig = px.line(df, x=x_col, y=y_col, color=color_col,
title=f"Line: {x_col} vs {y_col}")
elif plot_type == "Bar":
# For bar charts, aggregate if necessary
if color_col:
fig = px.bar(df, x=x_col, y=y_col, color=color_col,
title=f"Bar: {x_col} vs {y_col}")
else:
fig = px.bar(df, x=x_col, y=y_col,
title=f"Bar: {x_col} vs {y_col}")
elif plot_type == "Histogram":
fig = px.histogram(df, x=x_col, color=color_col,
title=f"Histogram of {x_col}")
elif plot_type == "Box":
fig = px.box(df, x=x_col, y=y_col, color=color_col,
title=f"Box plot: {x_col} vs {y_col}")
elif plot_type == "Heatmap":
# Create correlation matrix for numeric columns
numeric_df = df.select_dtypes(include=['number'])
if len(numeric_df.columns) > 1:
corr = numeric_df.corr()
fig = px.imshow(corr, text_auto=True, title="Correlation Heatmap")
else:
return None
fig.update_layout(height=500)
return fig
except Exception as e:
print(f"Plot error: {e}")
return None
# Create the Gradio interface
with gr.Blocks(title="Dynamic Dataset Explorer", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# π Dynamic Dataset Explorer
Upload your own data or explore built-in datasets with automatic visualization
""")
# Hidden state to store current dataframe
current_df = gr.State(value=default_df)
with gr.Tabs():
# Tab 1: Dataset Management
with gr.TabItem("π Dataset Management"):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Load Built-in Dataset")
builtin_choice = gr.Dropdown(
choices=["Gapminder", "Iris", "Tips", "Stock Data", "Wind Data"],
value="Gapminder",
label="Select Dataset"
)
load_builtin_btn = gr.Button("Load Dataset", variant="primary")
gr.Markdown("### Upload Custom Dataset")
file_upload = gr.File(label="Upload CSV or Excel", file_types=[".csv", ".xlsx", ".xls"])
custom_name = gr.Textbox(label="Dataset Name (optional)", placeholder="My Dataset")
upload_btn = gr.Button("Upload", variant="primary")
gr.Markdown("### Active Datasets")
dataset_selector = gr.Dropdown(
choices=list(datasets.keys()),
value="Gapminder",
label="Switch Dataset"
)
with gr.Column(scale=2):
status_msg = gr.Markdown("Ready to load data")
data_info = gr.Markdown()
data_preview = gr.Dataframe(label="Data Preview (first 10 rows)")
# Tab 2: Dynamic Visualization
with gr.TabItem("π Visualization"):
with gr.Row():
with gr.Column(scale=1):
plot_type = gr.Radio(
choices=["Scatter", "Line", "Bar", "Histogram", "Box", "Heatmap"],
value="Scatter",
label="Plot Type"
)
x_axis = gr.Dropdown(label="X Axis", choices=[], interactive=True)
y_axis = gr.Dropdown(label="Y Axis", choices=[], interactive=True)
color_by = gr.Dropdown(label="Color By (optional)", choices=[], interactive=True)
size_by = gr.Dropdown(label="Size By (optional)", choices=[], interactive=True)
plot_btn = gr.Button("Create Plot", variant="primary")
with gr.Column(scale=2):
plot_output = gr.Plot(label="Visualization")
# Tab 3: Data Analysis
with gr.TabItem("π Data Analysis"):
with gr.Row():
with gr.Column():
analysis_type = gr.Radio(
choices=["Summary Statistics", "Missing Values", "Data Types", "Unique Values"],
value="Summary Statistics",
label="Analysis Type"
)
analyze_btn = gr.Button("Analyze", variant="primary")
with gr.Column():
analysis_output = gr.Markdown()
def analyze_data(df, analysis_type):
"""Perform different types of data analysis"""
if df is None:
return "No dataset loaded"
if analysis_type == "Summary Statistics":
return f"```\n{df.describe().to_string()}\n```"
elif analysis_type == "Missing Values":
missing = df.isnull().sum()
return f"```\n{missing[missing > 0].to_string()}\n```" if missing.any() else "No missing values!"
elif analysis_type == "Data Types":
return f"```\n{df.dtypes.to_string()}\n```"
elif analysis_type == "Unique Values":
unique_counts = df.nunique()
return f"```\n{unique_counts.to_string()}\n```"
# Event handlers
load_builtin_btn.click(
load_builtin_dataset,
inputs=[builtin_choice],
outputs=[data_preview, status_msg]
).then(
lambda: gr.update(choices=list(datasets.keys())),
outputs=[dataset_selector]
)
upload_btn.click(
upload_dataset,
inputs=[file_upload, custom_name],
outputs=[data_preview, status_msg, dataset_selector]
)
# When dataset is switched, update everything
dataset_selector.change(
switch_dataset,
inputs=[dataset_selector],
outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
)
# Create plot based on selections
plot_btn.click(
create_plot,
inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
outputs=[plot_output]
)
# Auto-update plot when parameters change
for component in [plot_type, x_axis, y_axis, color_by, size_by]:
component.change(
create_plot,
inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
outputs=[plot_output]
)
# Analysis
analyze_btn.click(
analyze_data,
inputs=[current_df, analysis_type],
outputs=[analysis_output]
)
# Load initial dataset
demo.load(
switch_dataset,
inputs=[dataset_selector],
outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
)
if __name__ == "__main__":
demo.launch(share=False, debug=True) |