Spaces:

dkhangn
/

CS5130_finalProject

Sleeping

Khang Nguyen

inital commit

aa893a9 about 2 months ago

22 kB

	import gradio as gr
	import pandas as pd

	from data_processor import (
	load_data,
	get_basic_info,
	detect_column_types,
	numeric_summary,
	categorical_summary,
	missing_values_report,
	correlation_matrix,
	build_filter_metadata,
	apply_filters,
	)

	from visualizations import (
	create_time_series_plot,
	create_distribution_plot,
	create_category_bar_plot,
	create_scatter_plot,
	create_correlation_heatmap,
	)

	from insights import (
	region_ranking,
	top_bottom_groups,
	model_production_vs_delivery,
	overall_trend_summary,
	)

	from utils import figure_to_png_bytes, dict_to_text


	# -----------------------------
	# Helper functions for loading
	# -----------------------------
	def load_any_source(file_or_path):
	"""
	Common loader for both uploaded files and sample CSV paths.
	Returns:
	df, meta, col_types, info_dict, preview_df, error_message
	"""
	df, err = load_data(file_or_path)
	if err:
	return None, None, None, None, None, err

	info = get_basic_info(df)
	col_types = detect_column_types(df)
	meta = build_filter_metadata(df, col_types)
	preview = df.head(20)
	return df, meta, col_types, info, preview, None


	# ============================================================
	# Gradio app
	# ============================================================
	def create_dashboard():
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🚗 Tesla Production & Deliveries Dashboard")
	gr.Markdown(
	"Upload data or use the built-in Tesla samples to explore statistics, "
	"filter data, visualize trends, and generate insights."
	)

	# Shared state across tabs
	df_state = gr.State(None)
	meta_state = gr.State(None)
	col_types_state = gr.State(None)

	# ----------------------------------------------------
	# 1. DATA UPLOAD TAB
	# ----------------------------------------------------
	with gr.Tab("Data Upload"):
	gr.Markdown("### Upload your own file or use one of the sample Tesla datasets")

	file_input = gr.File(label="Upload CSV or Excel File")
	load_btn = gr.Button("Load Uploaded Data")

	sample_small_btn = gr.Button("Load Tesla Sample (1K rows)")
	sample_large_btn = gr.Button("Load Tesla Sample (50K rows)")

	info_box = gr.JSON(label="Dataset Info")
	preview_box = gr.Dataframe(label="Preview (first 20 rows)", interactive=False)
	error_box = gr.Markdown(label="Messages")

	# ----------------------------------------------------
	# 2. STATISTICS TAB
	# ----------------------------------------------------
	with gr.Tab("Statistics"):
	gr.Markdown("### Summary Statistics")

	num_stats = gr.Dataframe(label="Numeric Summary", interactive=False, visible=False)
	cat_stats = gr.Dataframe(label="Categorical Summary", interactive=False, visible=False)
	missing_stats = gr.Dataframe(label="Missing Values", interactive=False, visible=False)
	corr_plot = gr.Plot(label="Correlation Heatmap", visible=False)

	show_stats_btn = gr.Button("Generate Statistics")

	# ----------------------------------------------------
	# 3. FILTER & EXPLORE TAB
	# ----------------------------------------------------
	with gr.Tab("Filter & Explore"):
	gr.Markdown("### Apply Filters to Explore the Dataset")

	# Numeric filter
	gr.Markdown("Numeric Filter (optional)")
	with gr.Row():
	filt_num_col = gr.Dropdown(label="Numeric Column", choices=[], interactive=True)
	filt_num_min = gr.Number(label="Min value")
	filt_num_max = gr.Number(label="Max value")

	# Categorical filter
	gr.Markdown("Categorical Filter (optional)")
	with gr.Row():
	filt_cat_col = gr.Dropdown(label="Categorical Column", choices=[], interactive=True)
	filt_cat_vals = gr.Dropdown(
	label="Allowed Values", choices=[], multiselect=True, interactive=True
	)

	# Date filter
	gr.Markdown("Date Filter (optional)")
	with gr.Row():
	filt_date_col = gr.Dropdown(label="Date Column", choices=[], interactive=True)
	filt_date_start = gr.Textbox(label="Start date (YYYY-MM-DD)")
	filt_date_end = gr.Textbox(label="End date (YYYY-MM-DD)")

	apply_filters_btn = gr.Button("Apply Filters")

	filtered_preview = gr.Dataframe(
	label="Filtered Data Preview", interactive=False, visible=False
	)

	export_filtered_btn = gr.Button("Export Filtered Data")
	filtered_download = gr.File(label="Download CSV")

	# ----------------------------------------------------
	# 4. VISUALIZATIONS TAB
	# ----------------------------------------------------
	with gr.Tab("Visualizations"):
	gr.Markdown("### Create Charts")

	chart_type = gr.Dropdown(
	[
	"Time Series",
	"Histogram",
	"Box Plot",
	"Category Bar",
	"Scatter",
	"Correlation Heatmap",
	],
	label="Chart Type",
	value="Time Series",
	)

	x_col = gr.Dropdown(label="X Column (for Scatter)", choices=[], interactive=True)
	y_col = gr.Dropdown(label="Y Column (for Scatter)", choices=[], interactive=True)
	date_col = gr.Dropdown(label="Date Column (for Time Series)", choices=[], interactive=True)
	value_col = gr.Dropdown(label="Value Column", choices=[], interactive=True)
	category_col = gr.Dropdown(
	label="Category Column (optional)", choices=[], interactive=True
	)

	agg_func = gr.Dropdown(
	["sum", "mean", "count", "median"],
	label="Aggregation (for Time Series / Category Bar)",
	value="sum",
	)
	freq = gr.Dropdown(
	["M", "Q", "Y"],
	label="Resample Frequency (Time Series)",
	value="Q",
	)

	viz_btn = gr.Button("Generate Chart")
	viz_plot = gr.Plot(label="Visualization")
	viz_download = gr.File(label="Download PNG")

	# ----------------------------------------------------
	# 5. INSIGHTS TAB
	# ----------------------------------------------------
	with gr.Tab("Insights"):
	gr.Markdown("### Automatic Insights")

	gr.Markdown("#### Region Ranking (by Estimated Deliveries)")
	region_rank_tb = gr.Dataframe(
	label="Regions ranked by total Estimated Deliveries",
	interactive=False,
	visible=False,
	)

	gr.Markdown("#### Top / Bottom Models (by Estimated Deliveries)")
	model_top_tb = gr.Dataframe(
	label="Top Models", interactive=False, visible=False
	)
	model_bottom_tb = gr.Dataframe(
	label="Bottom Models", interactive=False, visible=False
	)

	gr.Markdown("#### Model Production vs Estimated Deliveries")
	model_comp_tb = gr.Dataframe(
	label="Production vs Deliveries by Model",
	interactive=False,
	visible=False,
	)

	trend_text = gr.Textbox(
	label="Overall Trend Summary",
	lines=10,
	interactive=False,
	value="Click 'Generate Insights' after loading a dataset.",
	)

	insights_btn = gr.Button("Generate Insights")

	# =====================================================
	# CALLBACK IMPLEMENTATIONS
	# =====================================================

	# ---------- Load handlers ----------
	def handle_load(source):
	df, meta, col_types, info, preview, err = load_any_source(source)
	if err:
	# Clear states and dropdowns, show error
	empty_df_update = gr.update(value=None, visible=False)
	empty_choices = gr.update(choices=[], value=None)

	return (
	None, # df_state
	None, # meta_state
	None, # col_types_state
	{"error": err}, # info_box
	None, # preview_box
	f"❌ {err}", # error_box
	empty_choices, # filt_num_col
	empty_choices, # filt_cat_col
	empty_choices, # filt_date_col
	empty_choices, # x_col
	empty_choices, # y_col
	empty_choices, # date_col
	empty_choices, # value_col
	empty_choices, # category_col
	)

	# Normal case
	numeric_cols = col_types["numeric"]
	categorical_cols = col_types["categorical"]
	date_cols = col_types["date"]

	num_choice_update = gr.update(choices=numeric_cols, value=None)
	cat_choice_update = gr.update(choices=categorical_cols, value=None)
	date_choice_update = gr.update(choices=date_cols, value=None)

	return (
	df, # df_state
	meta, # meta_state
	col_types, # col_types_state
	info, # info_box
	preview, # preview_box
	"✅ Data loaded successfully.", # error_box
	num_choice_update, # filt_num_col
	cat_choice_update, # filt_cat_col
	date_choice_update, # filt_date_col
	num_choice_update, # x_col
	num_choice_update, # y_col
	date_choice_update, # date_col
	num_choice_update, # value_col
	cat_choice_update, # category_col
	)

	load_btn.click(
	handle_load,
	inputs=[file_input],
	outputs=[
	df_state,
	meta_state,
	col_types_state,
	info_box,
	preview_box,
	error_box,
	filt_num_col,
	filt_cat_col,
	filt_date_col,
	x_col,
	y_col,
	date_col,
	value_col,
	category_col,
	],
	)

	def load_sample_small():
	return handle_load("data/tesla_deliveries_1k.csv")

	def load_sample_large():
	return handle_load("data/tesla_deliveries_50k.csv")

	sample_small_btn.click(
	load_sample_small,
	inputs=[],
	outputs=[
	df_state,
	meta_state,
	col_types_state,
	info_box,
	preview_box,
	error_box,
	filt_num_col,
	filt_cat_col,
	filt_date_col,
	x_col,
	y_col,
	date_col,
	value_col,
	category_col,
	],
	)

	sample_large_btn.click(
	load_sample_large,
	inputs=[],
	outputs=[
	df_state,
	meta_state,
	col_types_state,
	info_box,
	preview_box,
	error_box,
	filt_num_col,
	filt_cat_col,
	filt_date_col,
	x_col,
	y_col,
	date_col,
	value_col,
	category_col,
	],
	)

	# ---------- Statistics ----------
	def show_stats(df, col_types):
	if df is None or col_types is None:
	empty_df = gr.update(value=None, visible=False)
	empty_plot = gr.update(value=None, visible=False)
	return empty_df, empty_df, empty_df, empty_plot

	numeric_cols = col_types["numeric"]
	categorical_cols = col_types["categorical"]

	num_df = numeric_summary(df, numeric_cols)
	cat_df = categorical_summary(df, categorical_cols)
	miss_df = missing_values_report(df)
	fig = create_correlation_heatmap(df, numeric_cols)

	num_u = gr.update(value=num_df, visible=True)
	cat_u = gr.update(value=cat_df, visible=True)
	miss_u = gr.update(value=miss_df, visible=True)
	corr_u = (
	gr.update(value=fig, visible=True)
	if fig is not None
	else gr.update(value=None, visible=False)
	)
	return num_u, cat_u, miss_u, corr_u

	show_stats_btn.click(
	show_stats,
	inputs=[df_state, col_types_state],
	outputs=[num_stats, cat_stats, missing_stats, corr_plot],
	)

	# ---------- Filter metadata helpers ----------
	def update_numeric_bounds(meta, col_name):
	if meta is None or not col_name:
	return gr.update(value=None), gr.update(value=None)
	info = meta["numeric"].get(col_name)
	if not info:
	return gr.update(value=None), gr.update(value=None)
	return gr.update(value=info["min"]), gr.update(value=info["max"])

	def update_categorical_values(meta, col_name):
	if meta is None or not col_name:
	return gr.update(choices=[], value=[])
	values = meta["categorical"].get(col_name, [])
	return gr.update(choices=values, value=[])

	def update_date_bounds(meta, col_name):
	if meta is None or not col_name:
	return gr.update(value=""), gr.update(value="")
	info = meta["date"].get(col_name)
	if not info:
	return gr.update(value=""), gr.update(value="")
	start = str(info["min"]).split(" ")[0]
	end = str(info["max"]).split(" ")[0]
	return gr.update(value=start), gr.update(value=end)

	filt_num_col.change(
	update_numeric_bounds,
	inputs=[meta_state, filt_num_col],
	outputs=[filt_num_min, filt_num_max],
	)

	filt_cat_col.change(
	update_categorical_values,
	inputs=[meta_state, filt_cat_col],
	outputs=[filt_cat_vals],
	)

	filt_date_col.change(
	update_date_bounds,
	inputs=[meta_state, filt_date_col],
	outputs=[filt_date_start, filt_date_end],
	)

	# ---------- Apply filters ----------
	def apply_filters_wrapper(
	df,
	num_col,
	num_min_val,
	num_max_val,
	cat_col,
	cat_vals,
	d_col,
	d_start,
	d_end,
	):
	if df is None:
	return gr.update(value=None, visible=False)

	numeric_filters = {}
	categorical_filters = {}
	date_filters = {}

	if num_col and num_min_val is not None and num_max_val is not None:
	numeric_filters[num_col] = [num_min_val, num_max_val]

	if cat_col and cat_vals:
	categorical_filters[cat_col] = cat_vals

	if d_col and d_start and d_end:
	date_filters[d_col] = [d_start, d_end]

	out_df = apply_filters(
	df,
	numeric_filters=numeric_filters or None,
	categorical_filters=categorical_filters or None,
	date_filters=date_filters or None,
	)
	return gr.update(value=out_df.head(50), visible=True)

	apply_filters_btn.click(
	apply_filters_wrapper,
	inputs=[
	df_state,
	filt_num_col,
	filt_num_min,
	filt_num_max,
	filt_cat_col,
	filt_cat_vals,
	filt_date_col,
	filt_date_start,
	filt_date_end,
	],
	outputs=[filtered_preview],
	)

	# ---------- Export filtered data ----------
	def export_filtered_data(
	df,
	num_col,
	num_min_val,
	num_max_val,
	cat_col,
	cat_vals,
	d_col,
	d_start,
	d_end,
	):
	if df is None:
	return None

	numeric_filters = {}
	categorical_filters = {}
	date_filters = {}

	if num_col and num_min_val is not None and num_max_val is not None:
	numeric_filters[num_col] = [num_min_val, num_max_val]

	if cat_col and cat_vals:
	categorical_filters[cat_col] = cat_vals

	if d_col and d_start and d_end:
	date_filters[d_col] = [d_start, d_end]

	out_df = apply_filters(
	df,
	numeric_filters=numeric_filters or None,
	categorical_filters=categorical_filters or None,
	date_filters=date_filters or None,
	)
	path = "filtered_output.csv"
	out_df.to_csv(path, index=False)
	return path

	export_filtered_btn.click(
	export_filtered_data,
	inputs=[
	df_state,
	filt_num_col,
	filt_num_min,
	filt_num_max,
	filt_cat_col,
	filt_cat_vals,
	filt_date_col,
	filt_date_start,
	filt_date_end,
	],
	outputs=[filtered_download],
	)

	# ---------- Visualizations ----------
	def make_chart(df, ctype, x, y, date_c, val, cat, agg, freq_val, col_types):
	if df is None:
	return None, None

	fig = None

	if ctype == "Time Series" and date_c and val:
	fig = create_time_series_plot(
	df,
	date_col=date_c,
	value_col=val,
	agg_func=agg,
	freq=freq_val,
	category_col=cat,
	)
	elif ctype == "Histogram" and val:
	fig = create_distribution_plot(df, numeric_col=val, kind="hist")
	elif ctype == "Box Plot" and val:
	fig = create_distribution_plot(df, numeric_col=val, kind="box")
	elif ctype == "Category Bar" and cat:
	fig = create_category_bar_plot(
	df, category_col=cat, value_col=val, agg_func=agg
	)
	elif ctype == "Scatter" and x and y:
	fig = create_scatter_plot(df, x_col=x, y_col=y, category_col=cat)
	elif ctype == "Correlation Heatmap":
	if col_types is None:
	col_types = detect_column_types(df)
	numeric_cols = col_types["numeric"]
	fig = create_correlation_heatmap(df, numeric_cols)

	if fig is None:
	return None, None

	png_bytes = figure_to_png_bytes(fig)
	path = "chart_output.png"
	with open(path, "wb") as f:
	f.write(png_bytes)

	return fig, path

	viz_btn.click(
	make_chart,
	inputs=[
	df_state,
	chart_type,
	x_col,
	y_col,
	date_col,
	value_col,
	category_col,
	agg_func,
	freq,
	col_types_state,
	],
	outputs=[viz_plot, viz_download],
	)

	# ---------- Insights ----------
	def get_insights(df):
	if df is None:
	empty_df = gr.update(value=None, visible=False)
	msg = "No data loaded. Please load a dataset in the Data Upload tab first."
	return empty_df, empty_df, empty_df, empty_df, msg

	# 1) Region ranking
	region_rank = region_ranking(df, value_col="Estimated_Deliveries")

	# 2) Top / Bottom models (non-overlapping)
	tb_models = top_bottom_groups(df, "Model", "Estimated_Deliveries", top_n=5)
	model_top = tb_models["top"]
	model_bottom = tb_models["bottom"]

	# 3) Production vs deliveries by model
	model_comp = model_production_vs_delivery(
	df,
	model_col="Model",
	deliveries_col="Estimated_Deliveries",
	prod_col="Production_Units",
	)

	# 4) Trend summary (quarterly)
	summary_dict, _ = overall_trend_summary(
	df, "Date", "Estimated_Deliveries", freq="Q"
	)
	summary_text = dict_to_text(summary_dict)

	region_u = gr.update(value=region_rank, visible=True)
	model_top_u = gr.update(value=model_top, visible=True)
	model_bottom_u = gr.update(value=model_bottom, visible=True)
	model_comp_u = gr.update(value=model_comp, visible=True)

	return region_u, model_top_u, model_bottom_u, model_comp_u, summary_text

	insights_btn.click(
	get_insights,
	inputs=[df_state],
	outputs=[region_rank_tb, model_top_tb, model_bottom_tb, model_comp_tb, trend_text],
	)

	return demo


	if __name__ == "__main__":
	demo = create_dashboard()
	demo.launch()