Khang Nguyen
inital commit
aa893a9
import gradio as gr
import pandas as pd
from data_processor import (
load_data,
get_basic_info,
detect_column_types,
numeric_summary,
categorical_summary,
missing_values_report,
correlation_matrix,
build_filter_metadata,
apply_filters,
)
from visualizations import (
create_time_series_plot,
create_distribution_plot,
create_category_bar_plot,
create_scatter_plot,
create_correlation_heatmap,
)
from insights import (
region_ranking,
top_bottom_groups,
model_production_vs_delivery,
overall_trend_summary,
)
from utils import figure_to_png_bytes, dict_to_text
# -----------------------------
# Helper functions for loading
# -----------------------------
def load_any_source(file_or_path):
"""
Common loader for both uploaded files and sample CSV paths.
Returns:
df, meta, col_types, info_dict, preview_df, error_message
"""
df, err = load_data(file_or_path)
if err:
return None, None, None, None, None, err
info = get_basic_info(df)
col_types = detect_column_types(df)
meta = build_filter_metadata(df, col_types)
preview = df.head(20)
return df, meta, col_types, info, preview, None
# ============================================================
# Gradio app
# ============================================================
def create_dashboard():
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🚗 Tesla Production & Deliveries Dashboard")
gr.Markdown(
"Upload data or use the built-in Tesla samples to explore statistics, "
"filter data, visualize trends, and generate insights."
)
# Shared state across tabs
df_state = gr.State(None)
meta_state = gr.State(None)
col_types_state = gr.State(None)
# ----------------------------------------------------
# 1. DATA UPLOAD TAB
# ----------------------------------------------------
with gr.Tab("Data Upload"):
gr.Markdown("### Upload your own file or use one of the sample Tesla datasets")
file_input = gr.File(label="Upload CSV or Excel File")
load_btn = gr.Button("Load Uploaded Data")
sample_small_btn = gr.Button("Load Tesla Sample (1K rows)")
sample_large_btn = gr.Button("Load Tesla Sample (50K rows)")
info_box = gr.JSON(label="Dataset Info")
preview_box = gr.Dataframe(label="Preview (first 20 rows)", interactive=False)
error_box = gr.Markdown(label="Messages")
# ----------------------------------------------------
# 2. STATISTICS TAB
# ----------------------------------------------------
with gr.Tab("Statistics"):
gr.Markdown("### Summary Statistics")
num_stats = gr.Dataframe(label="Numeric Summary", interactive=False, visible=False)
cat_stats = gr.Dataframe(label="Categorical Summary", interactive=False, visible=False)
missing_stats = gr.Dataframe(label="Missing Values", interactive=False, visible=False)
corr_plot = gr.Plot(label="Correlation Heatmap", visible=False)
show_stats_btn = gr.Button("Generate Statistics")
# ----------------------------------------------------
# 3. FILTER & EXPLORE TAB
# ----------------------------------------------------
with gr.Tab("Filter & Explore"):
gr.Markdown("### Apply Filters to Explore the Dataset")
# Numeric filter
gr.Markdown("**Numeric Filter (optional)**")
with gr.Row():
filt_num_col = gr.Dropdown(label="Numeric Column", choices=[], interactive=True)
filt_num_min = gr.Number(label="Min value")
filt_num_max = gr.Number(label="Max value")
# Categorical filter
gr.Markdown("**Categorical Filter (optional)**")
with gr.Row():
filt_cat_col = gr.Dropdown(label="Categorical Column", choices=[], interactive=True)
filt_cat_vals = gr.Dropdown(
label="Allowed Values", choices=[], multiselect=True, interactive=True
)
# Date filter
gr.Markdown("**Date Filter (optional)**")
with gr.Row():
filt_date_col = gr.Dropdown(label="Date Column", choices=[], interactive=True)
filt_date_start = gr.Textbox(label="Start date (YYYY-MM-DD)")
filt_date_end = gr.Textbox(label="End date (YYYY-MM-DD)")
apply_filters_btn = gr.Button("Apply Filters")
filtered_preview = gr.Dataframe(
label="Filtered Data Preview", interactive=False, visible=False
)
export_filtered_btn = gr.Button("Export Filtered Data")
filtered_download = gr.File(label="Download CSV")
# ----------------------------------------------------
# 4. VISUALIZATIONS TAB
# ----------------------------------------------------
with gr.Tab("Visualizations"):
gr.Markdown("### Create Charts")
chart_type = gr.Dropdown(
[
"Time Series",
"Histogram",
"Box Plot",
"Category Bar",
"Scatter",
"Correlation Heatmap",
],
label="Chart Type",
value="Time Series",
)
x_col = gr.Dropdown(label="X Column (for Scatter)", choices=[], interactive=True)
y_col = gr.Dropdown(label="Y Column (for Scatter)", choices=[], interactive=True)
date_col = gr.Dropdown(label="Date Column (for Time Series)", choices=[], interactive=True)
value_col = gr.Dropdown(label="Value Column", choices=[], interactive=True)
category_col = gr.Dropdown(
label="Category Column (optional)", choices=[], interactive=True
)
agg_func = gr.Dropdown(
["sum", "mean", "count", "median"],
label="Aggregation (for Time Series / Category Bar)",
value="sum",
)
freq = gr.Dropdown(
["M", "Q", "Y"],
label="Resample Frequency (Time Series)",
value="Q",
)
viz_btn = gr.Button("Generate Chart")
viz_plot = gr.Plot(label="Visualization")
viz_download = gr.File(label="Download PNG")
# ----------------------------------------------------
# 5. INSIGHTS TAB
# ----------------------------------------------------
with gr.Tab("Insights"):
gr.Markdown("### Automatic Insights")
gr.Markdown("#### Region Ranking (by Estimated Deliveries)")
region_rank_tb = gr.Dataframe(
label="Regions ranked by total Estimated Deliveries",
interactive=False,
visible=False,
)
gr.Markdown("#### Top / Bottom Models (by Estimated Deliveries)")
model_top_tb = gr.Dataframe(
label="Top Models", interactive=False, visible=False
)
model_bottom_tb = gr.Dataframe(
label="Bottom Models", interactive=False, visible=False
)
gr.Markdown("#### Model Production vs Estimated Deliveries")
model_comp_tb = gr.Dataframe(
label="Production vs Deliveries by Model",
interactive=False,
visible=False,
)
trend_text = gr.Textbox(
label="Overall Trend Summary",
lines=10,
interactive=False,
value="Click 'Generate Insights' after loading a dataset.",
)
insights_btn = gr.Button("Generate Insights")
# =====================================================
# CALLBACK IMPLEMENTATIONS
# =====================================================
# ---------- Load handlers ----------
def handle_load(source):
df, meta, col_types, info, preview, err = load_any_source(source)
if err:
# Clear states and dropdowns, show error
empty_df_update = gr.update(value=None, visible=False)
empty_choices = gr.update(choices=[], value=None)
return (
None, # df_state
None, # meta_state
None, # col_types_state
{"error": err}, # info_box
None, # preview_box
f"❌ {err}", # error_box
empty_choices, # filt_num_col
empty_choices, # filt_cat_col
empty_choices, # filt_date_col
empty_choices, # x_col
empty_choices, # y_col
empty_choices, # date_col
empty_choices, # value_col
empty_choices, # category_col
)
# Normal case
numeric_cols = col_types["numeric"]
categorical_cols = col_types["categorical"]
date_cols = col_types["date"]
num_choice_update = gr.update(choices=numeric_cols, value=None)
cat_choice_update = gr.update(choices=categorical_cols, value=None)
date_choice_update = gr.update(choices=date_cols, value=None)
return (
df, # df_state
meta, # meta_state
col_types, # col_types_state
info, # info_box
preview, # preview_box
"✅ Data loaded successfully.", # error_box
num_choice_update, # filt_num_col
cat_choice_update, # filt_cat_col
date_choice_update, # filt_date_col
num_choice_update, # x_col
num_choice_update, # y_col
date_choice_update, # date_col
num_choice_update, # value_col
cat_choice_update, # category_col
)
load_btn.click(
handle_load,
inputs=[file_input],
outputs=[
df_state,
meta_state,
col_types_state,
info_box,
preview_box,
error_box,
filt_num_col,
filt_cat_col,
filt_date_col,
x_col,
y_col,
date_col,
value_col,
category_col,
],
)
def load_sample_small():
return handle_load("data/tesla_deliveries_1k.csv")
def load_sample_large():
return handle_load("data/tesla_deliveries_50k.csv")
sample_small_btn.click(
load_sample_small,
inputs=[],
outputs=[
df_state,
meta_state,
col_types_state,
info_box,
preview_box,
error_box,
filt_num_col,
filt_cat_col,
filt_date_col,
x_col,
y_col,
date_col,
value_col,
category_col,
],
)
sample_large_btn.click(
load_sample_large,
inputs=[],
outputs=[
df_state,
meta_state,
col_types_state,
info_box,
preview_box,
error_box,
filt_num_col,
filt_cat_col,
filt_date_col,
x_col,
y_col,
date_col,
value_col,
category_col,
],
)
# ---------- Statistics ----------
def show_stats(df, col_types):
if df is None or col_types is None:
empty_df = gr.update(value=None, visible=False)
empty_plot = gr.update(value=None, visible=False)
return empty_df, empty_df, empty_df, empty_plot
numeric_cols = col_types["numeric"]
categorical_cols = col_types["categorical"]
num_df = numeric_summary(df, numeric_cols)
cat_df = categorical_summary(df, categorical_cols)
miss_df = missing_values_report(df)
fig = create_correlation_heatmap(df, numeric_cols)
num_u = gr.update(value=num_df, visible=True)
cat_u = gr.update(value=cat_df, visible=True)
miss_u = gr.update(value=miss_df, visible=True)
corr_u = (
gr.update(value=fig, visible=True)
if fig is not None
else gr.update(value=None, visible=False)
)
return num_u, cat_u, miss_u, corr_u
show_stats_btn.click(
show_stats,
inputs=[df_state, col_types_state],
outputs=[num_stats, cat_stats, missing_stats, corr_plot],
)
# ---------- Filter metadata helpers ----------
def update_numeric_bounds(meta, col_name):
if meta is None or not col_name:
return gr.update(value=None), gr.update(value=None)
info = meta["numeric"].get(col_name)
if not info:
return gr.update(value=None), gr.update(value=None)
return gr.update(value=info["min"]), gr.update(value=info["max"])
def update_categorical_values(meta, col_name):
if meta is None or not col_name:
return gr.update(choices=[], value=[])
values = meta["categorical"].get(col_name, [])
return gr.update(choices=values, value=[])
def update_date_bounds(meta, col_name):
if meta is None or not col_name:
return gr.update(value=""), gr.update(value="")
info = meta["date"].get(col_name)
if not info:
return gr.update(value=""), gr.update(value="")
start = str(info["min"]).split(" ")[0]
end = str(info["max"]).split(" ")[0]
return gr.update(value=start), gr.update(value=end)
filt_num_col.change(
update_numeric_bounds,
inputs=[meta_state, filt_num_col],
outputs=[filt_num_min, filt_num_max],
)
filt_cat_col.change(
update_categorical_values,
inputs=[meta_state, filt_cat_col],
outputs=[filt_cat_vals],
)
filt_date_col.change(
update_date_bounds,
inputs=[meta_state, filt_date_col],
outputs=[filt_date_start, filt_date_end],
)
# ---------- Apply filters ----------
def apply_filters_wrapper(
df,
num_col,
num_min_val,
num_max_val,
cat_col,
cat_vals,
d_col,
d_start,
d_end,
):
if df is None:
return gr.update(value=None, visible=False)
numeric_filters = {}
categorical_filters = {}
date_filters = {}
if num_col and num_min_val is not None and num_max_val is not None:
numeric_filters[num_col] = [num_min_val, num_max_val]
if cat_col and cat_vals:
categorical_filters[cat_col] = cat_vals
if d_col and d_start and d_end:
date_filters[d_col] = [d_start, d_end]
out_df = apply_filters(
df,
numeric_filters=numeric_filters or None,
categorical_filters=categorical_filters or None,
date_filters=date_filters or None,
)
return gr.update(value=out_df.head(50), visible=True)
apply_filters_btn.click(
apply_filters_wrapper,
inputs=[
df_state,
filt_num_col,
filt_num_min,
filt_num_max,
filt_cat_col,
filt_cat_vals,
filt_date_col,
filt_date_start,
filt_date_end,
],
outputs=[filtered_preview],
)
# ---------- Export filtered data ----------
def export_filtered_data(
df,
num_col,
num_min_val,
num_max_val,
cat_col,
cat_vals,
d_col,
d_start,
d_end,
):
if df is None:
return None
numeric_filters = {}
categorical_filters = {}
date_filters = {}
if num_col and num_min_val is not None and num_max_val is not None:
numeric_filters[num_col] = [num_min_val, num_max_val]
if cat_col and cat_vals:
categorical_filters[cat_col] = cat_vals
if d_col and d_start and d_end:
date_filters[d_col] = [d_start, d_end]
out_df = apply_filters(
df,
numeric_filters=numeric_filters or None,
categorical_filters=categorical_filters or None,
date_filters=date_filters or None,
)
path = "filtered_output.csv"
out_df.to_csv(path, index=False)
return path
export_filtered_btn.click(
export_filtered_data,
inputs=[
df_state,
filt_num_col,
filt_num_min,
filt_num_max,
filt_cat_col,
filt_cat_vals,
filt_date_col,
filt_date_start,
filt_date_end,
],
outputs=[filtered_download],
)
# ---------- Visualizations ----------
def make_chart(df, ctype, x, y, date_c, val, cat, agg, freq_val, col_types):
if df is None:
return None, None
fig = None
if ctype == "Time Series" and date_c and val:
fig = create_time_series_plot(
df,
date_col=date_c,
value_col=val,
agg_func=agg,
freq=freq_val,
category_col=cat,
)
elif ctype == "Histogram" and val:
fig = create_distribution_plot(df, numeric_col=val, kind="hist")
elif ctype == "Box Plot" and val:
fig = create_distribution_plot(df, numeric_col=val, kind="box")
elif ctype == "Category Bar" and cat:
fig = create_category_bar_plot(
df, category_col=cat, value_col=val, agg_func=agg
)
elif ctype == "Scatter" and x and y:
fig = create_scatter_plot(df, x_col=x, y_col=y, category_col=cat)
elif ctype == "Correlation Heatmap":
if col_types is None:
col_types = detect_column_types(df)
numeric_cols = col_types["numeric"]
fig = create_correlation_heatmap(df, numeric_cols)
if fig is None:
return None, None
png_bytes = figure_to_png_bytes(fig)
path = "chart_output.png"
with open(path, "wb") as f:
f.write(png_bytes)
return fig, path
viz_btn.click(
make_chart,
inputs=[
df_state,
chart_type,
x_col,
y_col,
date_col,
value_col,
category_col,
agg_func,
freq,
col_types_state,
],
outputs=[viz_plot, viz_download],
)
# ---------- Insights ----------
def get_insights(df):
if df is None:
empty_df = gr.update(value=None, visible=False)
msg = "No data loaded. Please load a dataset in the Data Upload tab first."
return empty_df, empty_df, empty_df, empty_df, msg
# 1) Region ranking
region_rank = region_ranking(df, value_col="Estimated_Deliveries")
# 2) Top / Bottom models (non-overlapping)
tb_models = top_bottom_groups(df, "Model", "Estimated_Deliveries", top_n=5)
model_top = tb_models["top"]
model_bottom = tb_models["bottom"]
# 3) Production vs deliveries by model
model_comp = model_production_vs_delivery(
df,
model_col="Model",
deliveries_col="Estimated_Deliveries",
prod_col="Production_Units",
)
# 4) Trend summary (quarterly)
summary_dict, _ = overall_trend_summary(
df, "Date", "Estimated_Deliveries", freq="Q"
)
summary_text = dict_to_text(summary_dict)
region_u = gr.update(value=region_rank, visible=True)
model_top_u = gr.update(value=model_top, visible=True)
model_bottom_u = gr.update(value=model_bottom, visible=True)
model_comp_u = gr.update(value=model_comp, visible=True)
return region_u, model_top_u, model_bottom_u, model_comp_u, summary_text
insights_btn.click(
get_insights,
inputs=[df_state],
outputs=[region_rank_tb, model_top_tb, model_bottom_tb, model_comp_tb, trend_text],
)
return demo
if __name__ == "__main__":
demo = create_dashboard()
demo.launch()