import gradio as gr import pandas as pd from data_processor import ( load_data, get_basic_info, detect_column_types, numeric_summary, categorical_summary, missing_values_report, correlation_matrix, build_filter_metadata, apply_filters, ) from visualizations import ( create_time_series_plot, create_distribution_plot, create_category_bar_plot, create_scatter_plot, create_correlation_heatmap, ) from insights import ( region_ranking, top_bottom_groups, model_production_vs_delivery, overall_trend_summary, ) from utils import figure_to_png_bytes, dict_to_text # ----------------------------- # Helper functions for loading # ----------------------------- def load_any_source(file_or_path): """ Common loader for both uploaded files and sample CSV paths. Returns: df, meta, col_types, info_dict, preview_df, error_message """ df, err = load_data(file_or_path) if err: return None, None, None, None, None, err info = get_basic_info(df) col_types = detect_column_types(df) meta = build_filter_metadata(df, col_types) preview = df.head(20) return df, meta, col_types, info, preview, None # ============================================================ # Gradio app # ============================================================ def create_dashboard(): with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🚗 Tesla Production & Deliveries Dashboard") gr.Markdown( "Upload data or use the built-in Tesla samples to explore statistics, " "filter data, visualize trends, and generate insights." ) # Shared state across tabs df_state = gr.State(None) meta_state = gr.State(None) col_types_state = gr.State(None) # ---------------------------------------------------- # 1. DATA UPLOAD TAB # ---------------------------------------------------- with gr.Tab("Data Upload"): gr.Markdown("### Upload your own file or use one of the sample Tesla datasets") file_input = gr.File(label="Upload CSV or Excel File") load_btn = gr.Button("Load Uploaded Data") sample_small_btn = gr.Button("Load Tesla Sample (1K rows)") sample_large_btn = gr.Button("Load Tesla Sample (50K rows)") info_box = gr.JSON(label="Dataset Info") preview_box = gr.Dataframe(label="Preview (first 20 rows)", interactive=False) error_box = gr.Markdown(label="Messages") # ---------------------------------------------------- # 2. STATISTICS TAB # ---------------------------------------------------- with gr.Tab("Statistics"): gr.Markdown("### Summary Statistics") num_stats = gr.Dataframe(label="Numeric Summary", interactive=False, visible=False) cat_stats = gr.Dataframe(label="Categorical Summary", interactive=False, visible=False) missing_stats = gr.Dataframe(label="Missing Values", interactive=False, visible=False) corr_plot = gr.Plot(label="Correlation Heatmap", visible=False) show_stats_btn = gr.Button("Generate Statistics") # ---------------------------------------------------- # 3. FILTER & EXPLORE TAB # ---------------------------------------------------- with gr.Tab("Filter & Explore"): gr.Markdown("### Apply Filters to Explore the Dataset") # Numeric filter gr.Markdown("**Numeric Filter (optional)**") with gr.Row(): filt_num_col = gr.Dropdown(label="Numeric Column", choices=[], interactive=True) filt_num_min = gr.Number(label="Min value") filt_num_max = gr.Number(label="Max value") # Categorical filter gr.Markdown("**Categorical Filter (optional)**") with gr.Row(): filt_cat_col = gr.Dropdown(label="Categorical Column", choices=[], interactive=True) filt_cat_vals = gr.Dropdown( label="Allowed Values", choices=[], multiselect=True, interactive=True ) # Date filter gr.Markdown("**Date Filter (optional)**") with gr.Row(): filt_date_col = gr.Dropdown(label="Date Column", choices=[], interactive=True) filt_date_start = gr.Textbox(label="Start date (YYYY-MM-DD)") filt_date_end = gr.Textbox(label="End date (YYYY-MM-DD)") apply_filters_btn = gr.Button("Apply Filters") filtered_preview = gr.Dataframe( label="Filtered Data Preview", interactive=False, visible=False ) export_filtered_btn = gr.Button("Export Filtered Data") filtered_download = gr.File(label="Download CSV") # ---------------------------------------------------- # 4. VISUALIZATIONS TAB # ---------------------------------------------------- with gr.Tab("Visualizations"): gr.Markdown("### Create Charts") chart_type = gr.Dropdown( [ "Time Series", "Histogram", "Box Plot", "Category Bar", "Scatter", "Correlation Heatmap", ], label="Chart Type", value="Time Series", ) x_col = gr.Dropdown(label="X Column (for Scatter)", choices=[], interactive=True) y_col = gr.Dropdown(label="Y Column (for Scatter)", choices=[], interactive=True) date_col = gr.Dropdown(label="Date Column (for Time Series)", choices=[], interactive=True) value_col = gr.Dropdown(label="Value Column", choices=[], interactive=True) category_col = gr.Dropdown( label="Category Column (optional)", choices=[], interactive=True ) agg_func = gr.Dropdown( ["sum", "mean", "count", "median"], label="Aggregation (for Time Series / Category Bar)", value="sum", ) freq = gr.Dropdown( ["M", "Q", "Y"], label="Resample Frequency (Time Series)", value="Q", ) viz_btn = gr.Button("Generate Chart") viz_plot = gr.Plot(label="Visualization") viz_download = gr.File(label="Download PNG") # ---------------------------------------------------- # 5. INSIGHTS TAB # ---------------------------------------------------- with gr.Tab("Insights"): gr.Markdown("### Automatic Insights") gr.Markdown("#### Region Ranking (by Estimated Deliveries)") region_rank_tb = gr.Dataframe( label="Regions ranked by total Estimated Deliveries", interactive=False, visible=False, ) gr.Markdown("#### Top / Bottom Models (by Estimated Deliveries)") model_top_tb = gr.Dataframe( label="Top Models", interactive=False, visible=False ) model_bottom_tb = gr.Dataframe( label="Bottom Models", interactive=False, visible=False ) gr.Markdown("#### Model Production vs Estimated Deliveries") model_comp_tb = gr.Dataframe( label="Production vs Deliveries by Model", interactive=False, visible=False, ) trend_text = gr.Textbox( label="Overall Trend Summary", lines=10, interactive=False, value="Click 'Generate Insights' after loading a dataset.", ) insights_btn = gr.Button("Generate Insights") # ===================================================== # CALLBACK IMPLEMENTATIONS # ===================================================== # ---------- Load handlers ---------- def handle_load(source): df, meta, col_types, info, preview, err = load_any_source(source) if err: # Clear states and dropdowns, show error empty_df_update = gr.update(value=None, visible=False) empty_choices = gr.update(choices=[], value=None) return ( None, # df_state None, # meta_state None, # col_types_state {"error": err}, # info_box None, # preview_box f"❌ {err}", # error_box empty_choices, # filt_num_col empty_choices, # filt_cat_col empty_choices, # filt_date_col empty_choices, # x_col empty_choices, # y_col empty_choices, # date_col empty_choices, # value_col empty_choices, # category_col ) # Normal case numeric_cols = col_types["numeric"] categorical_cols = col_types["categorical"] date_cols = col_types["date"] num_choice_update = gr.update(choices=numeric_cols, value=None) cat_choice_update = gr.update(choices=categorical_cols, value=None) date_choice_update = gr.update(choices=date_cols, value=None) return ( df, # df_state meta, # meta_state col_types, # col_types_state info, # info_box preview, # preview_box "✅ Data loaded successfully.", # error_box num_choice_update, # filt_num_col cat_choice_update, # filt_cat_col date_choice_update, # filt_date_col num_choice_update, # x_col num_choice_update, # y_col date_choice_update, # date_col num_choice_update, # value_col cat_choice_update, # category_col ) load_btn.click( handle_load, inputs=[file_input], outputs=[ df_state, meta_state, col_types_state, info_box, preview_box, error_box, filt_num_col, filt_cat_col, filt_date_col, x_col, y_col, date_col, value_col, category_col, ], ) def load_sample_small(): return handle_load("data/tesla_deliveries_1k.csv") def load_sample_large(): return handle_load("data/tesla_deliveries_50k.csv") sample_small_btn.click( load_sample_small, inputs=[], outputs=[ df_state, meta_state, col_types_state, info_box, preview_box, error_box, filt_num_col, filt_cat_col, filt_date_col, x_col, y_col, date_col, value_col, category_col, ], ) sample_large_btn.click( load_sample_large, inputs=[], outputs=[ df_state, meta_state, col_types_state, info_box, preview_box, error_box, filt_num_col, filt_cat_col, filt_date_col, x_col, y_col, date_col, value_col, category_col, ], ) # ---------- Statistics ---------- def show_stats(df, col_types): if df is None or col_types is None: empty_df = gr.update(value=None, visible=False) empty_plot = gr.update(value=None, visible=False) return empty_df, empty_df, empty_df, empty_plot numeric_cols = col_types["numeric"] categorical_cols = col_types["categorical"] num_df = numeric_summary(df, numeric_cols) cat_df = categorical_summary(df, categorical_cols) miss_df = missing_values_report(df) fig = create_correlation_heatmap(df, numeric_cols) num_u = gr.update(value=num_df, visible=True) cat_u = gr.update(value=cat_df, visible=True) miss_u = gr.update(value=miss_df, visible=True) corr_u = ( gr.update(value=fig, visible=True) if fig is not None else gr.update(value=None, visible=False) ) return num_u, cat_u, miss_u, corr_u show_stats_btn.click( show_stats, inputs=[df_state, col_types_state], outputs=[num_stats, cat_stats, missing_stats, corr_plot], ) # ---------- Filter metadata helpers ---------- def update_numeric_bounds(meta, col_name): if meta is None or not col_name: return gr.update(value=None), gr.update(value=None) info = meta["numeric"].get(col_name) if not info: return gr.update(value=None), gr.update(value=None) return gr.update(value=info["min"]), gr.update(value=info["max"]) def update_categorical_values(meta, col_name): if meta is None or not col_name: return gr.update(choices=[], value=[]) values = meta["categorical"].get(col_name, []) return gr.update(choices=values, value=[]) def update_date_bounds(meta, col_name): if meta is None or not col_name: return gr.update(value=""), gr.update(value="") info = meta["date"].get(col_name) if not info: return gr.update(value=""), gr.update(value="") start = str(info["min"]).split(" ")[0] end = str(info["max"]).split(" ")[0] return gr.update(value=start), gr.update(value=end) filt_num_col.change( update_numeric_bounds, inputs=[meta_state, filt_num_col], outputs=[filt_num_min, filt_num_max], ) filt_cat_col.change( update_categorical_values, inputs=[meta_state, filt_cat_col], outputs=[filt_cat_vals], ) filt_date_col.change( update_date_bounds, inputs=[meta_state, filt_date_col], outputs=[filt_date_start, filt_date_end], ) # ---------- Apply filters ---------- def apply_filters_wrapper( df, num_col, num_min_val, num_max_val, cat_col, cat_vals, d_col, d_start, d_end, ): if df is None: return gr.update(value=None, visible=False) numeric_filters = {} categorical_filters = {} date_filters = {} if num_col and num_min_val is not None and num_max_val is not None: numeric_filters[num_col] = [num_min_val, num_max_val] if cat_col and cat_vals: categorical_filters[cat_col] = cat_vals if d_col and d_start and d_end: date_filters[d_col] = [d_start, d_end] out_df = apply_filters( df, numeric_filters=numeric_filters or None, categorical_filters=categorical_filters or None, date_filters=date_filters or None, ) return gr.update(value=out_df.head(50), visible=True) apply_filters_btn.click( apply_filters_wrapper, inputs=[ df_state, filt_num_col, filt_num_min, filt_num_max, filt_cat_col, filt_cat_vals, filt_date_col, filt_date_start, filt_date_end, ], outputs=[filtered_preview], ) # ---------- Export filtered data ---------- def export_filtered_data( df, num_col, num_min_val, num_max_val, cat_col, cat_vals, d_col, d_start, d_end, ): if df is None: return None numeric_filters = {} categorical_filters = {} date_filters = {} if num_col and num_min_val is not None and num_max_val is not None: numeric_filters[num_col] = [num_min_val, num_max_val] if cat_col and cat_vals: categorical_filters[cat_col] = cat_vals if d_col and d_start and d_end: date_filters[d_col] = [d_start, d_end] out_df = apply_filters( df, numeric_filters=numeric_filters or None, categorical_filters=categorical_filters or None, date_filters=date_filters or None, ) path = "filtered_output.csv" out_df.to_csv(path, index=False) return path export_filtered_btn.click( export_filtered_data, inputs=[ df_state, filt_num_col, filt_num_min, filt_num_max, filt_cat_col, filt_cat_vals, filt_date_col, filt_date_start, filt_date_end, ], outputs=[filtered_download], ) # ---------- Visualizations ---------- def make_chart(df, ctype, x, y, date_c, val, cat, agg, freq_val, col_types): if df is None: return None, None fig = None if ctype == "Time Series" and date_c and val: fig = create_time_series_plot( df, date_col=date_c, value_col=val, agg_func=agg, freq=freq_val, category_col=cat, ) elif ctype == "Histogram" and val: fig = create_distribution_plot(df, numeric_col=val, kind="hist") elif ctype == "Box Plot" and val: fig = create_distribution_plot(df, numeric_col=val, kind="box") elif ctype == "Category Bar" and cat: fig = create_category_bar_plot( df, category_col=cat, value_col=val, agg_func=agg ) elif ctype == "Scatter" and x and y: fig = create_scatter_plot(df, x_col=x, y_col=y, category_col=cat) elif ctype == "Correlation Heatmap": if col_types is None: col_types = detect_column_types(df) numeric_cols = col_types["numeric"] fig = create_correlation_heatmap(df, numeric_cols) if fig is None: return None, None png_bytes = figure_to_png_bytes(fig) path = "chart_output.png" with open(path, "wb") as f: f.write(png_bytes) return fig, path viz_btn.click( make_chart, inputs=[ df_state, chart_type, x_col, y_col, date_col, value_col, category_col, agg_func, freq, col_types_state, ], outputs=[viz_plot, viz_download], ) # ---------- Insights ---------- def get_insights(df): if df is None: empty_df = gr.update(value=None, visible=False) msg = "No data loaded. Please load a dataset in the Data Upload tab first." return empty_df, empty_df, empty_df, empty_df, msg # 1) Region ranking region_rank = region_ranking(df, value_col="Estimated_Deliveries") # 2) Top / Bottom models (non-overlapping) tb_models = top_bottom_groups(df, "Model", "Estimated_Deliveries", top_n=5) model_top = tb_models["top"] model_bottom = tb_models["bottom"] # 3) Production vs deliveries by model model_comp = model_production_vs_delivery( df, model_col="Model", deliveries_col="Estimated_Deliveries", prod_col="Production_Units", ) # 4) Trend summary (quarterly) summary_dict, _ = overall_trend_summary( df, "Date", "Estimated_Deliveries", freq="Q" ) summary_text = dict_to_text(summary_dict) region_u = gr.update(value=region_rank, visible=True) model_top_u = gr.update(value=model_top, visible=True) model_bottom_u = gr.update(value=model_bottom, visible=True) model_comp_u = gr.update(value=model_comp, visible=True) return region_u, model_top_u, model_bottom_u, model_comp_u, summary_text insights_btn.click( get_insights, inputs=[df_state], outputs=[region_rank_tb, model_top_tb, model_bottom_tb, model_comp_tb, trend_text], ) return demo if __name__ == "__main__": demo = create_dashboard() demo.launch()