Spaces:

dkhangn
/

CS5130_finalProject

Sleeping

File size: 21,966 Bytes

aa893a9

import gradio as gr
import pandas as pd

from data_processor import (
    load_data,
    get_basic_info,
    detect_column_types,
    numeric_summary,
    categorical_summary,
    missing_values_report,
    correlation_matrix,
    build_filter_metadata,
    apply_filters,
)

from visualizations import (
    create_time_series_plot,
    create_distribution_plot,
    create_category_bar_plot,
    create_scatter_plot,
    create_correlation_heatmap,
)

from insights import (
    region_ranking,
    top_bottom_groups,
    model_production_vs_delivery,
    overall_trend_summary,
)

from utils import figure_to_png_bytes, dict_to_text


# -----------------------------
# Helper functions for loading
# -----------------------------
def load_any_source(file_or_path):
    """
    Common loader for both uploaded files and sample CSV paths.
    Returns:
        df, meta, col_types, info_dict, preview_df, error_message
    """
    df, err = load_data(file_or_path)
    if err:
        return None, None, None, None, None, err

    info = get_basic_info(df)
    col_types = detect_column_types(df)
    meta = build_filter_metadata(df, col_types)
    preview = df.head(20)
    return df, meta, col_types, info, preview, None


# ============================================================
# Gradio app
# ============================================================
def create_dashboard():
    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        gr.Markdown("# 🚗 Tesla Production & Deliveries Dashboard")
        gr.Markdown(
            "Upload data or use the built-in Tesla samples to explore statistics, "
            "filter data, visualize trends, and generate insights."
        )

        # Shared state across tabs
        df_state = gr.State(None)
        meta_state = gr.State(None)
        col_types_state = gr.State(None)

        # ----------------------------------------------------
        # 1. DATA UPLOAD TAB
        # ----------------------------------------------------
        with gr.Tab("Data Upload"):
            gr.Markdown("### Upload your own file or use one of the sample Tesla datasets")

            file_input = gr.File(label="Upload CSV or Excel File")
            load_btn = gr.Button("Load Uploaded Data")

            sample_small_btn = gr.Button("Load Tesla Sample (1K rows)")
            sample_large_btn = gr.Button("Load Tesla Sample (50K rows)")

            info_box = gr.JSON(label="Dataset Info")
            preview_box = gr.Dataframe(label="Preview (first 20 rows)", interactive=False)
            error_box = gr.Markdown(label="Messages")

        # ----------------------------------------------------
        # 2. STATISTICS TAB
        # ----------------------------------------------------
        with gr.Tab("Statistics"):
            gr.Markdown("### Summary Statistics")

            num_stats = gr.Dataframe(label="Numeric Summary", interactive=False, visible=False)
            cat_stats = gr.Dataframe(label="Categorical Summary", interactive=False, visible=False)
            missing_stats = gr.Dataframe(label="Missing Values", interactive=False, visible=False)
            corr_plot = gr.Plot(label="Correlation Heatmap", visible=False)

            show_stats_btn = gr.Button("Generate Statistics")

        # ----------------------------------------------------
        # 3. FILTER & EXPLORE TAB
        # ----------------------------------------------------
        with gr.Tab("Filter & Explore"):
            gr.Markdown("### Apply Filters to Explore the Dataset")

            # Numeric filter
            gr.Markdown("**Numeric Filter (optional)**")
            with gr.Row():
                filt_num_col = gr.Dropdown(label="Numeric Column", choices=[], interactive=True)
                filt_num_min = gr.Number(label="Min value")
                filt_num_max = gr.Number(label="Max value")

            # Categorical filter
            gr.Markdown("**Categorical Filter (optional)**")
            with gr.Row():
                filt_cat_col = gr.Dropdown(label="Categorical Column", choices=[], interactive=True)
                filt_cat_vals = gr.Dropdown(
                    label="Allowed Values", choices=[], multiselect=True, interactive=True
                )

            # Date filter
            gr.Markdown("**Date Filter (optional)**")
            with gr.Row():
                filt_date_col = gr.Dropdown(label="Date Column", choices=[], interactive=True)
                filt_date_start = gr.Textbox(label="Start date (YYYY-MM-DD)")
                filt_date_end = gr.Textbox(label="End date (YYYY-MM-DD)")

            apply_filters_btn = gr.Button("Apply Filters")

            filtered_preview = gr.Dataframe(
                label="Filtered Data Preview", interactive=False, visible=False
            )

            export_filtered_btn = gr.Button("Export Filtered Data")
            filtered_download = gr.File(label="Download CSV")

        # ----------------------------------------------------
        # 4. VISUALIZATIONS TAB
        # ----------------------------------------------------
        with gr.Tab("Visualizations"):
            gr.Markdown("### Create Charts")

            chart_type = gr.Dropdown(
                [
                    "Time Series",
                    "Histogram",
                    "Box Plot",
                    "Category Bar",
                    "Scatter",
                    "Correlation Heatmap",
                ],
                label="Chart Type",
                value="Time Series",
            )

            x_col = gr.Dropdown(label="X Column (for Scatter)", choices=[], interactive=True)
            y_col = gr.Dropdown(label="Y Column (for Scatter)", choices=[], interactive=True)
            date_col = gr.Dropdown(label="Date Column (for Time Series)", choices=[], interactive=True)
            value_col = gr.Dropdown(label="Value Column", choices=[], interactive=True)
            category_col = gr.Dropdown(
                label="Category Column (optional)", choices=[], interactive=True
            )

            agg_func = gr.Dropdown(
                ["sum", "mean", "count", "median"],
                label="Aggregation (for Time Series / Category Bar)",
                value="sum",
            )
            freq = gr.Dropdown(
                ["M", "Q", "Y"],
                label="Resample Frequency (Time Series)",
                value="Q",
            )

            viz_btn = gr.Button("Generate Chart")
            viz_plot = gr.Plot(label="Visualization")
            viz_download = gr.File(label="Download PNG")

        # ----------------------------------------------------
        # 5. INSIGHTS TAB
        # ----------------------------------------------------
        with gr.Tab("Insights"):
            gr.Markdown("### Automatic Insights")

            gr.Markdown("#### Region Ranking (by Estimated Deliveries)")
            region_rank_tb = gr.Dataframe(
                label="Regions ranked by total Estimated Deliveries",
                interactive=False,
                visible=False,
            )

            gr.Markdown("#### Top / Bottom Models (by Estimated Deliveries)")
            model_top_tb = gr.Dataframe(
                label="Top Models", interactive=False, visible=False
            )
            model_bottom_tb = gr.Dataframe(
                label="Bottom Models", interactive=False, visible=False
            )

            gr.Markdown("#### Model Production vs Estimated Deliveries")
            model_comp_tb = gr.Dataframe(
                label="Production vs Deliveries by Model",
                interactive=False,
                visible=False,
            )

            trend_text = gr.Textbox(
                label="Overall Trend Summary",
                lines=10,
                interactive=False,
                value="Click 'Generate Insights' after loading a dataset.",
            )

            insights_btn = gr.Button("Generate Insights")

        # =====================================================
        # CALLBACK IMPLEMENTATIONS
        # =====================================================

        # ---------- Load handlers ----------
        def handle_load(source):
            df, meta, col_types, info, preview, err = load_any_source(source)
            if err:
                # Clear states and dropdowns, show error
                empty_df_update = gr.update(value=None, visible=False)
                empty_choices = gr.update(choices=[], value=None)

                return (
                    None,  # df_state
                    None,  # meta_state
                    None,  # col_types_state
                    {"error": err},  # info_box
                    None,  # preview_box
                    f"❌ {err}",  # error_box
                    empty_choices,  # filt_num_col
                    empty_choices,  # filt_cat_col
                    empty_choices,  # filt_date_col
                    empty_choices,  # x_col
                    empty_choices,  # y_col
                    empty_choices,  # date_col
                    empty_choices,  # value_col
                    empty_choices,  # category_col
                )

            # Normal case
            numeric_cols = col_types["numeric"]
            categorical_cols = col_types["categorical"]
            date_cols = col_types["date"]

            num_choice_update = gr.update(choices=numeric_cols, value=None)
            cat_choice_update = gr.update(choices=categorical_cols, value=None)
            date_choice_update = gr.update(choices=date_cols, value=None)

            return (
                df,       # df_state
                meta,     # meta_state
                col_types,  # col_types_state
                info,     # info_box
                preview,  # preview_box
                "✅ Data loaded successfully.",  # error_box
                num_choice_update,   # filt_num_col
                cat_choice_update,   # filt_cat_col
                date_choice_update,  # filt_date_col
                num_choice_update,   # x_col
                num_choice_update,   # y_col
                date_choice_update,  # date_col
                num_choice_update,   # value_col
                cat_choice_update,   # category_col
            )

        load_btn.click(
            handle_load,
            inputs=[file_input],
            outputs=[
                df_state,
                meta_state,
                col_types_state,
                info_box,
                preview_box,
                error_box,
                filt_num_col,
                filt_cat_col,
                filt_date_col,
                x_col,
                y_col,
                date_col,
                value_col,
                category_col,
            ],
        )

        def load_sample_small():
            return handle_load("data/tesla_deliveries_1k.csv")

        def load_sample_large():
            return handle_load("data/tesla_deliveries_50k.csv")

        sample_small_btn.click(
            load_sample_small,
            inputs=[],
            outputs=[
                df_state,
                meta_state,
                col_types_state,
                info_box,
                preview_box,
                error_box,
                filt_num_col,
                filt_cat_col,
                filt_date_col,
                x_col,
                y_col,
                date_col,
                value_col,
                category_col,
            ],
        )

        sample_large_btn.click(
            load_sample_large,
            inputs=[],
            outputs=[
                df_state,
                meta_state,
                col_types_state,
                info_box,
                preview_box,
                error_box,
                filt_num_col,
                filt_cat_col,
                filt_date_col,
                x_col,
                y_col,
                date_col,
                value_col,
                category_col,
            ],
        )

        # ---------- Statistics ----------
        def show_stats(df, col_types):
            if df is None or col_types is None:
                empty_df = gr.update(value=None, visible=False)
                empty_plot = gr.update(value=None, visible=False)
                return empty_df, empty_df, empty_df, empty_plot

            numeric_cols = col_types["numeric"]
            categorical_cols = col_types["categorical"]

            num_df = numeric_summary(df, numeric_cols)
            cat_df = categorical_summary(df, categorical_cols)
            miss_df = missing_values_report(df)
            fig = create_correlation_heatmap(df, numeric_cols)

            num_u = gr.update(value=num_df, visible=True)
            cat_u = gr.update(value=cat_df, visible=True)
            miss_u = gr.update(value=miss_df, visible=True)
            corr_u = (
                gr.update(value=fig, visible=True)
                if fig is not None
                else gr.update(value=None, visible=False)
            )
            return num_u, cat_u, miss_u, corr_u

        show_stats_btn.click(
            show_stats,
            inputs=[df_state, col_types_state],
            outputs=[num_stats, cat_stats, missing_stats, corr_plot],
        )

        # ---------- Filter metadata helpers ----------
        def update_numeric_bounds(meta, col_name):
            if meta is None or not col_name:
                return gr.update(value=None), gr.update(value=None)
            info = meta["numeric"].get(col_name)
            if not info:
                return gr.update(value=None), gr.update(value=None)
            return gr.update(value=info["min"]), gr.update(value=info["max"])

        def update_categorical_values(meta, col_name):
            if meta is None or not col_name:
                return gr.update(choices=[], value=[])
            values = meta["categorical"].get(col_name, [])
            return gr.update(choices=values, value=[])

        def update_date_bounds(meta, col_name):
            if meta is None or not col_name:
                return gr.update(value=""), gr.update(value="")
            info = meta["date"].get(col_name)
            if not info:
                return gr.update(value=""), gr.update(value="")
            start = str(info["min"]).split(" ")[0]
            end = str(info["max"]).split(" ")[0]
            return gr.update(value=start), gr.update(value=end)

        filt_num_col.change(
            update_numeric_bounds,
            inputs=[meta_state, filt_num_col],
            outputs=[filt_num_min, filt_num_max],
        )

        filt_cat_col.change(
            update_categorical_values,
            inputs=[meta_state, filt_cat_col],
            outputs=[filt_cat_vals],
        )

        filt_date_col.change(
            update_date_bounds,
            inputs=[meta_state, filt_date_col],
            outputs=[filt_date_start, filt_date_end],
        )

        # ---------- Apply filters ----------
        def apply_filters_wrapper(
            df,
            num_col,
            num_min_val,
            num_max_val,
            cat_col,
            cat_vals,
            d_col,
            d_start,
            d_end,
        ):
            if df is None:
                return gr.update(value=None, visible=False)

            numeric_filters = {}
            categorical_filters = {}
            date_filters = {}

            if num_col and num_min_val is not None and num_max_val is not None:
                numeric_filters[num_col] = [num_min_val, num_max_val]

            if cat_col and cat_vals:
                categorical_filters[cat_col] = cat_vals

            if d_col and d_start and d_end:
                date_filters[d_col] = [d_start, d_end]

            out_df = apply_filters(
                df,
                numeric_filters=numeric_filters or None,
                categorical_filters=categorical_filters or None,
                date_filters=date_filters or None,
            )
            return gr.update(value=out_df.head(50), visible=True)

        apply_filters_btn.click(
            apply_filters_wrapper,
            inputs=[
                df_state,
                filt_num_col,
                filt_num_min,
                filt_num_max,
                filt_cat_col,
                filt_cat_vals,
                filt_date_col,
                filt_date_start,
                filt_date_end,
            ],
            outputs=[filtered_preview],
        )

        # ---------- Export filtered data ----------
        def export_filtered_data(
            df,
            num_col,
            num_min_val,
            num_max_val,
            cat_col,
            cat_vals,
            d_col,
            d_start,
            d_end,
        ):
            if df is None:
                return None

            numeric_filters = {}
            categorical_filters = {}
            date_filters = {}

            if num_col and num_min_val is not None and num_max_val is not None:
                numeric_filters[num_col] = [num_min_val, num_max_val]

            if cat_col and cat_vals:
                categorical_filters[cat_col] = cat_vals

            if d_col and d_start and d_end:
                date_filters[d_col] = [d_start, d_end]

            out_df = apply_filters(
                df,
                numeric_filters=numeric_filters or None,
                categorical_filters=categorical_filters or None,
                date_filters=date_filters or None,
            )
            path = "filtered_output.csv"
            out_df.to_csv(path, index=False)
            return path

        export_filtered_btn.click(
            export_filtered_data,
            inputs=[
                df_state,
                filt_num_col,
                filt_num_min,
                filt_num_max,
                filt_cat_col,
                filt_cat_vals,
                filt_date_col,
                filt_date_start,
                filt_date_end,
            ],
            outputs=[filtered_download],
        )

        # ---------- Visualizations ----------
        def make_chart(df, ctype, x, y, date_c, val, cat, agg, freq_val, col_types):
            if df is None:
                return None, None

            fig = None

            if ctype == "Time Series" and date_c and val:
                fig = create_time_series_plot(
                    df,
                    date_col=date_c,
                    value_col=val,
                    agg_func=agg,
                    freq=freq_val,
                    category_col=cat,
                )
            elif ctype == "Histogram" and val:
                fig = create_distribution_plot(df, numeric_col=val, kind="hist")
            elif ctype == "Box Plot" and val:
                fig = create_distribution_plot(df, numeric_col=val, kind="box")
            elif ctype == "Category Bar" and cat:
                fig = create_category_bar_plot(
                    df, category_col=cat, value_col=val, agg_func=agg
                )
            elif ctype == "Scatter" and x and y:
                fig = create_scatter_plot(df, x_col=x, y_col=y, category_col=cat)
            elif ctype == "Correlation Heatmap":
                if col_types is None:
                    col_types = detect_column_types(df)
                numeric_cols = col_types["numeric"]
                fig = create_correlation_heatmap(df, numeric_cols)

            if fig is None:
                return None, None

            png_bytes = figure_to_png_bytes(fig)
            path = "chart_output.png"
            with open(path, "wb") as f:
                f.write(png_bytes)

            return fig, path

        viz_btn.click(
            make_chart,
            inputs=[
                df_state,
                chart_type,
                x_col,
                y_col,
                date_col,
                value_col,
                category_col,
                agg_func,
                freq,
                col_types_state,
            ],
            outputs=[viz_plot, viz_download],
        )

        # ---------- Insights ----------
        def get_insights(df):
            if df is None:
                empty_df = gr.update(value=None, visible=False)
                msg = "No data loaded. Please load a dataset in the Data Upload tab first."
                return empty_df, empty_df, empty_df, empty_df, msg

            # 1) Region ranking
            region_rank = region_ranking(df, value_col="Estimated_Deliveries")

            # 2) Top / Bottom models (non-overlapping)
            tb_models = top_bottom_groups(df, "Model", "Estimated_Deliveries", top_n=5)
            model_top = tb_models["top"]
            model_bottom = tb_models["bottom"]

            # 3) Production vs deliveries by model
            model_comp = model_production_vs_delivery(
                df,
                model_col="Model",
                deliveries_col="Estimated_Deliveries",
                prod_col="Production_Units",
            )

            # 4) Trend summary (quarterly)
            summary_dict, _ = overall_trend_summary(
                df, "Date", "Estimated_Deliveries", freq="Q"
            )
            summary_text = dict_to_text(summary_dict)

            region_u = gr.update(value=region_rank, visible=True)
            model_top_u = gr.update(value=model_top, visible=True)
            model_bottom_u = gr.update(value=model_bottom, visible=True)
            model_comp_u = gr.update(value=model_comp, visible=True)

            return region_u, model_top_u, model_bottom_u, model_comp_u, summary_text

        insights_btn.click(
            get_insights,
            inputs=[df_state],
            outputs=[region_rank_tb, model_top_tb, model_bottom_tb, model_comp_tb, trend_text],
        )

    return demo


if __name__ == "__main__":
    demo = create_dashboard()
    demo.launch()