Spaces:

Saumith
/

Business_Intelligence_Dashboard

Sleeping

Business_Intelligence_Dashboard / app.py

Saumith devarsetty

Refactor: Consolidated insights, updated visualizations to Plotly, and updated README

4032138 3 months ago

54.7 kB

	"""
	My BI Dashboard - Main Application
	Built with Gradio for interactive data exploration and analysis.
	"""

	import gradio as gr
	import pandas as pd
	import matplotlib.pyplot as plt
	import plotly.express as px
	from typing import Optional, Tuple
	import os

	# My custom modules for data processing and visualization
	from data_processor import (
	load_data, get_data_summary, get_correlation_matrix,
	apply_filters, clean_data, aggregate_data, get_data_preview
	)
	from visualizations import (
	create_plotly_timeseries, create_plotly_distribution, create_plotly_category,
	create_plotly_scatter, create_plotly_heatmap
	)
	from insights import (
	generate_all_insights, format_insights_for_display,
	generate_visualization_insights, generate_advanced_insights, format_advanced_insights,
	generate_smart_dashboard, compare_datasets
	)
	from utils import get_column_types, get_missing_value_summary, get_dataframe_info


	# Store the current dataset globally
	current_df = None
	filtered_df = None


	def update_preview_pagination(offset=0):
	"""Updates the data preview based on offset."""
	global current_df

	if current_df is None:
	return "No data loaded.", None, 0

	total_rows = len(current_df)

	# Clamp offset
	if offset >= total_rows:
	offset = max(0, total_rows - (total_rows % 10 or 10))
	elif offset < 0:
	offset = 0

	preview = current_df.iloc[offset : offset + 10]
	status_msg = f"Dataset Loaded Successfully! ✅ (Rows {offset+1}-{min(offset+10, total_rows)} of {total_rows})"

	return status_msg, preview, offset


	def upload_and_preview_data(file):
	"""Handles file upload and shows preview."""
	global current_df, filtered_df

	if file is None:
	return (
	"Please upload a file.", None, 0, 0, # status, preview, row_count, offset
	gr.update(value=None), gr.update(value=None), gr.update(value=None), # num filters
	gr.update(choices=[], value=None), gr.update(choices=[]), # cat filter 1
	gr.update(choices=[], value=None), gr.update(choices=[]), # cat filter 2
	gr.update(choices=[], value=None), gr.update(value=""), gr.update(value=""), # date filter
	gr.update(choices=[], value=None), gr.update(choices=[], value=None), gr.update(choices=[], value=None), # viz cols
	gr.update(choices=[], value=None), gr.update(choices=[], value=None), # comp cols
	gr.update(choices=[], value=None) # drill col
	)

	try:
	# Load data
	df, error = load_data(file.name)

	if error:
	return [f"Error: {error}"] + [None]*19

	if df is not None:
	current_df = df
	filtered_df = df.copy()

	# Get column types
	col_types = get_column_types(df)

	# Get preview (first 10 rows)
	preview = df.head(10)
	status_msg = f"Dataset Loaded Successfully! ✅ (Rows 1-{min(10, len(df))} of {len(df)})"

	# Get all columns
	all_cols = df.columns.tolist()

	return (
	status_msg, preview, len(df), 0, # status, preview, row_count, offset
	gr.update(choices=col_types['numerical'], value=col_types['numerical'][0] if col_types['numerical'] else None), # num_col
	gr.update(value=None), gr.update(value=None), # num_min, num_max
	gr.update(choices=col_types['categorical'], value=col_types['categorical'][0] if col_types['categorical'] else None), # cat_col
	gr.update(choices=[]), # cat_vals
	gr.update(choices=col_types['categorical'], value=None), # cat_col_2
	gr.update(choices=[]), # cat_vals_2
	gr.update(choices=col_types['datetime'], value=col_types['datetime'][0] if col_types['datetime'] else None), # date_col
	gr.update(value=""), gr.update(value=""), # date_start, date_end
	gr.update(choices=all_cols, value=None), # x_col
	gr.update(choices=all_cols, value=None), # y_col
	gr.update(choices=all_cols, value=None), # color_col
	gr.update(choices=col_types['categorical'], value=None), # comp_a
	gr.update(choices=col_types['categorical'], value=None), # comp_b
	gr.update(choices=col_types['categorical'], value=None) # drill
	)
	else:
	return ["Error loading file."] + [None]*19
	except Exception as e:
	return [f"Error: {str(e)}"] + [None]*19


	def load_from_path_or_url(path_or_url):
	"""Load data from a file path or URL."""
	global current_df, filtered_df

	if not path_or_url or path_or_url.strip() == "":
	return "Please enter a file path or URL.", None, "", gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()

	try:
	# Check if it's a URL
	if path_or_url.startswith('http://') or path_or_url.startswith('https://'):
	# Load from URL
	if path_or_url.endswith('.csv'):
	df = pd.read_csv(path_or_url)
	elif path_or_url.endswith(('.xlsx', '.xls')):
	df = pd.read_excel(path_or_url)
	else:
	# Try CSV by default
	df = pd.read_csv(path_or_url)
	else:
	# Load from local path
	df, error = load_data(path_or_url)
	if error:
	return f"Error: {error}", None, "", gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()

	# Validate the loaded DataFrame
	is_valid, error_msg = validate_dataframe(df)
	if not is_valid:
	return f"Invalid data: {error_msg}", None, "", gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()

	# Auto-detect and convert datetime columns
	datetime_cols = detect_datetime_columns(df)
	for col in datetime_cols:
	try:
	df[col] = pd.to_datetime(df[col], errors='coerce')
	except Exception:
	pass

	current_df = df
	filtered_df = df.copy()

	# Get basic info
	info = get_dataframe_info(df)
	col_types = get_column_types(df)

	# Create summary text
	summary = f"""
	## Dataset Loaded Successfully! ✅

	Source: {path_or_url}

	Basic Information:
	- Rows: {info['rows']:,}
	- Columns: {info['columns']}
	- Numerical Columns: {info['numerical_columns']}
	- Categorical Columns: {info['categorical_columns']}
	- DateTime Columns: {info['datetime_columns']}
	- Memory Usage: {info['memory_usage_mb']:.2f} MB
	- Missing Values: {info['total_missing']:,} ({info['missing_percentage']:.2f}%)

	Column Names:
	{', '.join(df.columns.tolist())}
	"""

	# Get preview
	preview = df.head(10)

	# Get all columns for dropdowns
	all_cols = df.columns.tolist()

	# Return updates for visualization dropdowns and filter dropdowns
	return (
	summary,
	preview,
	"Data loaded successfully from path/URL!",
	gr.update(choices=all_cols, value=None), # x_column - clear selection
	gr.update(choices=all_cols, value=None), # y_column - clear selection
	gr.update(choices=all_cols, value=None), # color_column - clear selection
	gr.update(choices=col_types['numerical'], value=None), # num_filter_col - clear selection
	gr.update(choices=col_types['categorical'], value=None), # cat_filter_col - clear selection
	gr.update(choices=col_types['datetime'], value=None) # date_filter_col - clear selection
	)

	except Exception as e:
	return f"Error loading data: {str(e)}", None, "", gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()



	def show_statistics():
	"""Generate and display statistics for the current dataset."""
	global current_df

	if current_df is None:
	return 0, 0, 0, 0, None, None, None

	# Get summary from data processor
	summary = get_data_summary(current_df)

	# 1. Metrics
	rows = summary['shape'][0]
	cols = summary['shape'][1]
	dupes = summary.get('duplicate_rows', 0)

	total_cells = rows * cols
	missing_cells = sum(summary['missing_values'].values())
	missing_pct = (missing_cells / total_cells * 100) if total_cells > 0 else 0

	# 2. Numerical Stats (Transposed for better readability)
	if 'numerical_stats' in summary:
	num_df = pd.DataFrame(summary['numerical_stats']).T
	num_df = num_df.reset_index().rename(columns={'index': 'Feature'})
	# Round numeric columns
	for col in num_df.columns:
	if col != 'Feature':
	num_df[col] = num_df[col].apply(lambda x: round(x, 2))
	else:
	num_df = pd.DataFrame(columns=["No numerical columns found"])

	# 3. Categorical Stats
	if 'categorical_stats' in summary:
	cat_data = []
	for col, stats in summary['categorical_stats'].items():
	top_val = list(stats['top_values'].keys())[0] if stats['top_values'] else "N/A"
	top_count = list(stats['top_values'].values())[0] if stats['top_values'] else 0
	cat_data.append({
	'Feature': col,
	'Unique Values': stats['unique_count'],
	'Most Common': top_val,
	'Count': top_count,
	'Share (%)': round(top_count / rows * 100, 1)
	})
	cat_df = pd.DataFrame(cat_data)
	else:
	cat_df = pd.DataFrame(columns=["No categorical columns found"])

	# 4. Missing Stats
	missing_data = []
	for col, count in summary['missing_values'].items():
	if count > 0:
	missing_data.append({
	'Feature': col,
	'Missing Count': count,
	'Missing (%)': round(count / rows * 100, 2)
	})

	if missing_data:
	missing_df = pd.DataFrame(missing_data).sort_values('Missing Count', ascending=False)
	else:
	missing_df = pd.DataFrame(columns=["No missing values found"])

	# 5. Correlation Matrix
	corr_plot = create_plotly_heatmap(current_df, title="Correlation Matrix")

	return rows, cols, dupes, round(missing_pct, 2), num_df, cat_df, missing_df, corr_plot


	def update_filter_options():
	"""Update filter options based on current dataset."""
	global current_df

	if current_df is None:
	return gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[])

	col_types = get_column_types(current_df)

	return (
	gr.update(choices=col_types['numerical']),
	gr.update(choices=col_types['categorical']),
	gr.update(choices=col_types['datetime'])
	)


	def apply_filters_and_update(num_col, num_min, num_max, cat_col, cat_vals, cat_col_2, cat_vals_2, date_col, date_start, date_end, offset=0):
	"""Apply filters and return summary and preview."""
	global filtered_df, current_df

	if current_df is None:
	return "No data loaded.", None, 0, offset

	# Construct filters dictionary
	filters = {}

	# Numerical filter
	if num_col and (num_min is not None or num_max is not None):
	filters[num_col] = {
	'type': 'numerical',
	'min': num_min,
	'max': num_max
	}

	# Categorical filter 1
	if cat_col and cat_vals:
	filters[cat_col] = {
	'type': 'categorical',
	'values': cat_vals
	}

	# Categorical filter 2
	if cat_col_2 and cat_vals_2:
	filters[cat_col_2] = {
	'type': 'categorical',
	'values': cat_vals_2
	}

	# Date filter
	if date_col and (date_start or date_end):
	filters[date_col] = {
	'type': 'datetime',
	'start_date': date_start,
	'end_date': date_end
	}

	# Apply filters
	filtered_df = apply_filters(current_df, filters)

	# Create summary
	summary_lines = [
	"## Filtered Data",
	f"Original rows: {len(current_df):,}",
	f"Filtered rows: {len(filtered_df):,}",
	f"Rows removed: {len(current_df) - len(filtered_df):,}"
	]

	# Add breakdown for categorical filter 1
	if cat_col and cat_vals and not filtered_df.empty:
	try:
	counts = filtered_df[cat_col].value_counts()
	summary_lines.append(f"\n{cat_col} Breakdown:")
	for val in cat_vals:
	if val in counts:
	summary_lines.append(f"- {val}: {counts[val]:,}")
	else:
	summary_lines.append(f"- {val}: 0")
	except:
	pass

	# Add breakdown for categorical filter 2
	if cat_col_2 and cat_vals_2 and not filtered_df.empty:
	try:
	counts = filtered_df[cat_col_2].value_counts()
	summary_lines.append(f"\n{cat_col_2} Breakdown:")
	for val in cat_vals_2:
	if val in counts:
	summary_lines.append(f"- {val}: {counts[val]:,}")
	else:
	summary_lines.append(f"- {val}: 0")
	except:
	pass

	summary = "\n".join(summary_lines)

	# Pagination Logic
	total_rows = len(filtered_df)
	if offset >= total_rows:
	offset = max(0, total_rows - (total_rows % 20 or 20))
	elif offset < 0:
	offset = 0

	preview = filtered_df.iloc[offset : offset + 20]

	# Add pagination info to summary
	if total_rows > 0:
	summary += f"\n\nShowing Rows: {offset+1}-{min(offset+20, total_rows)}"

	return summary, preview, len(filtered_df), offset


	def create_visualization(viz_type, x_col, y_col, color_col, agg_method, top_n, offset=0):
	"""Create visualizations based on user selection and generate insights."""
	global filtered_df

	if filtered_df is None or len(filtered_df) == 0:
	return None, "Please upload and filter data first.", "", offset

	try:
	fig = None

	if viz_type == "Time Series":
	if not x_col or not y_col:
	return None, "Please select Date (X) and Value (Y) columns.", "", offset
	fig = create_plotly_timeseries(filtered_df, x_col, y_col, agg_method)

	elif viz_type == "Distribution (Histogram)":
	if not x_col:
	return None, "Please select Column.", "", offset
	fig = create_plotly_distribution(filtered_df, x_col)

	elif viz_type == "Correlation Heatmap":
	fig = create_plotly_heatmap(filtered_df)
	if fig is None:
	return None, "Need at least 2 numerical columns for correlation.", "", offset

	elif viz_type == "Distribution (Box Plot)":
	if not x_col:
	return None, "Please select Column.", "", offset
	fig = px.box(filtered_df, x=x_col, y=y_col, color=color_col, title=f"Distribution of {x_col}")

	elif viz_type == "Bar Chart":
	if not x_col:
	return None, "Please select X-Axis column.", "", offset

	# Handle Pagination Edge Cases
	total_items = filtered_df[x_col].nunique()
	if offset >= total_items and total_items > 0:
	# Clamp to last page
	offset = max(0, total_items - (total_items % top_n or top_n))
	status_msg = f"Reached end of data. Showing items {offset+1}-{total_items}."
	elif offset < 0:
	offset = 0
	status_msg = "Start of data."
	else:
	status_msg = "Visualization created successfully!"

	fig = create_plotly_category(filtered_df, x_col, y_col, agg_method, top_n, offset=offset)

	# Generate insights for the visualization
	insights = generate_visualization_insights(viz_type, filtered_df, x_col, y_col)
	insights_text = f"## 📊 Visualization Insights\n\n{insights}"

	return fig, status_msg, insights_text, offset

	elif viz_type == "Pie Chart":
	if not x_col:
	return None, "Please select Category column.", "", offset
	data = filtered_df[x_col].value_counts().head(top_n)
	fig = px.pie(values=data.values, names=data.index, title=f"Top {top_n} {x_col}")

	elif viz_type == "Scatter Plot":
	if not x_col or not y_col:
	return None, "Please select X and Y columns.", "", offset
	fig = create_plotly_scatter(filtered_df, x_col, y_col, color_col)

	elif viz_type == "Correlation Heatmap":
	fig = create_plotly_heatmap(filtered_df)
	if fig is None:
	return None, "Need at least 2 numerical columns.", "", offset

	# Generate insights for the visualization
	insights = generate_visualization_insights(viz_type, filtered_df, x_col, y_col)

	# Format insights with header
	insights_text = f"## 📊 Visualization Insights\n\n{insights}"

	return fig, "Visualization created successfully!", insights_text, offset

	except Exception as e:
	return None, f"Error: {str(e)}", "", offset


	def generate_insights_report():
	"""Generate automated insights."""
	global filtered_df

	if filtered_df is None or len(filtered_df) == 0:
	return "Please upload data first."

	try:
	insights = generate_all_insights(filtered_df)
	formatted_insights = format_insights_for_display(insights)
	return formatted_insights
	except Exception as e:
	return f"Error generating insights: {str(e)}"


	def export_filtered_data():
	"""Export filtered data to CSV."""
	global filtered_df

	if filtered_df is None:
	return None, "No data to export."

	output_path = "filtered_data_export.csv"
	filtered_df.to_csv(output_path, index=False)

	return output_path, f"Data exported successfully! ({len(filtered_df)} rows)"


	def export_visualization(fig):
	"""Export current visualization."""
	if fig is None:
	return None, "No visualization to export."

	output_path = "visualization_export.png"
	fig.savefig(output_path, dpi=300, bbox_inches='tight')

	return output_path, "Visualization exported successfully!"


	def create_dashboard():
	"""Creates my main Gradio dashboard interface."""

	with gr.Blocks(title="Business Intelligence Dashboard") as demo:
	gr.Markdown("""
	# 📊 Business Intelligence Dashboard
	### Professional Data Analysis & Visualization Platform

	Upload your data, explore insights, create visualizations, and export results.
	""")

	# Dropdowns that will be populated across tabs
	x_column_viz = None
	y_column_viz = None
	color_column_viz = None
	num_filter_dropdown = None
	cat_filter_dropdown = None
	cat_filter_dropdown_2 = None
	date_filter_dropdown = None
	comp_cat_col_a = None
	comp_cat_col_b = None
	comp_cat_val_a = None
	comp_cat_val_b = None
	drill_col = None
	drill_val = None

	# Tab 1: Data Upload
	with gr.Tab("📁 Data Upload"):
	# Welcome Banner Removed
	gr.Markdown("### 📤 Upload File")
	file_input = gr.File(label="Drop CSV or Excel file here", file_types=[".csv", ".xlsx", ".xls"], height=100)
	upload_btn = gr.Button("Load Data", variant="primary", size="lg")



	# Status & Preview Section
	gr.Markdown("### 📋 Data Status")
	with gr.Row():
	upload_message = gr.Textbox(label="System Status", value="Waiting for data...", interactive=False)

	upload_status = gr.Markdown()

	with gr.Accordion("👀 Data Preview", open=True):
	data_preview = gr.Dataframe(interactive=False)

	# Pagination Controls for Data Preview
	with gr.Row():
	prev_preview_btn = gr.Button("⬅️ Prev Batch", size="sm")
	next_preview_btn = gr.Button("Next Batch ➡️", size="sm")

	preview_offset = gr.State(value=0)
	preview_batch_size = gr.State(value=10)



	# Tab 2: Statistics
	with gr.Tab("📈 Statistics"):
	gr.Markdown("## 📊 Data Health & Statistics")

	stats_btn = gr.Button("Generate Statistics", variant="primary")

	# Metric Cards Row
	with gr.Row():
	stat_rows = gr.Number(label="Total Rows", value=0)
	stat_cols = gr.Number(label="Total Columns", value=0)
	stat_dupes = gr.Number(label="Duplicate Rows", value=0)
	stat_missing = gr.Number(label="Missing Cells (%)", value=0)

	gr.Markdown("### 🔢 Numerical Statistics")
	numerical_stats = gr.Dataframe(label="Descriptive Statistics (Transposed)", interactive=False)

	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📋 Categorical Summary")
	categorical_stats = gr.Dataframe(label="Top Categories", interactive=False)

	with gr.Column():
	gr.Markdown("### ⚠️ Missing Values Report")
	missing_stats = gr.Dataframe(label="Missing Data by Column", interactive=False)

	gr.Markdown("### 🔥 Correlation Matrix")
	corr_matrix_plot = gr.Plot(label="Correlation Matrix")

	stats_btn.click(
	fn=show_statistics,
	inputs=[],
	outputs=[stat_rows, stat_cols, stat_dupes, stat_missing, numerical_stats, categorical_stats, missing_stats, corr_matrix_plot]
	)

	# Tab 3: Filter & Explore
	with gr.Tab("🔍 Filter & Explore"):
	gr.Markdown("## Interactive Data Filtering")
	gr.Markdown("""
	How to use filters:
	1. Select a column from any filter section below
	2. Set your filter criteria (range, values, or dates)
	3. Click "Apply Filters" to see filtered results
	4. You can combine multiple filters together
	5. The filtered data will be used in Visualizations and Insights tabs
	""")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### 🔢 Numerical Filters")
	gr.Markdown("Filter by number ranges (e.g., Sales, Price, Quantity)")
	num_filter_dropdown = gr.Dropdown(label="Select Numerical Column", choices=[], interactive=True)
	with gr.Row():
	num_min = gr.Number(label="Minimum Value", placeholder="Min")
	num_max = gr.Number(label="Maximum Value", placeholder="Max")

	with gr.Column():
	gr.Markdown("### 📋 Categorical Filters")
	gr.Markdown("Filter by categories (e.g., Product, Region)")
	cat_filter_dropdown = gr.Dropdown(label="Select Categorical Column 1", choices=[], interactive=True)
	cat_filter_values = gr.CheckboxGroup(label="Select Values to Include", choices=[])

	gr.Markdown("---")
	cat_filter_dropdown_2 = gr.Dropdown(label="Select Categorical Column 2 (Optional)", choices=[], interactive=True)
	cat_filter_values_2 = gr.CheckboxGroup(label="Select Values to Include", choices=[])

	# Date filters in accordion (optional)
	with gr.Accordion("📅 Date Filters (Optional - Click to Expand)", open=False):
	gr.Markdown("Filter by date ranges (format: YYYY-MM-DD)")
	date_filter_dropdown = gr.Dropdown(label="Select Date Column", choices=[], interactive=True)
	with gr.Row():
	date_start = gr.Textbox(label="Start Date", placeholder="YYYY-MM-DD")
	date_end = gr.Textbox(label="End Date", placeholder="YYYY-MM-DD")

	with gr.Row():
	filter_btn = gr.Button("Apply Filters", variant="primary", size="lg")
	reset_filter_btn = gr.Button("Reset Filters", variant="secondary", size="lg")

	gr.Markdown("### Filter Results")
	filter_summary = gr.Markdown()
	with gr.Accordion("Filtered Data Preview", open=True):
	filtered_preview = gr.Dataframe(label="Filtered Data Preview")

	# Pagination Controls for Filtered Preview
	with gr.Row():
	prev_filtered_btn = gr.Button("⬅️ Prev Batch", size="sm")
	next_filtered_btn = gr.Button("Next Batch ➡️", size="sm")

	filtered_offset = gr.State(value=0)
	filtered_batch_size = gr.State(value=20)
	row_count = gr.Number(label="Total Filtered Rows", interactive=False)

	# Update categorical values when column is selected
	def update_cat_values(col):
	if current_df is not None and col:
	values = current_df[col].unique().tolist()
	# Reset value to None to avoid "value not in choices" error
	return gr.update(choices=values, value=None)
	return gr.update(choices=[], value=None)

	cat_filter_dropdown.change(
	fn=update_cat_values,
	inputs=[cat_filter_dropdown],
	outputs=[cat_filter_values]
	)

	cat_filter_dropdown_2.change(
	fn=update_cat_values,
	inputs=[cat_filter_dropdown_2],
	outputs=[cat_filter_values_2]
	)

	filter_btn.click(
	fn=apply_filters_and_update,
	inputs=[num_filter_dropdown, num_min, num_max, cat_filter_dropdown,
	cat_filter_values, cat_filter_dropdown_2, cat_filter_values_2,
	date_filter_dropdown, date_start, date_end],
	outputs=[filter_summary, filtered_preview, row_count, filtered_offset]
	)

	# Reset filters function
	def reset_filters():
	global filtered_df, current_df
	if current_df is not None:
	filtered_df = current_df.copy()
	preview = filtered_df.head(20)
	return (
	gr.update(value=None), # num_col
	gr.update(value=None), # num_min
	gr.update(value=None), # num_max
	gr.update(value=None), # cat_col
	gr.update(choices=[]), # cat_values
	gr.update(value=None), # cat_col_2
	gr.update(choices=[]), # cat_values_2
	gr.update(value=None), # date_col
	gr.update(value=""), # date_start
	gr.update(value=""), # date_end
	"Filters reset. Showing original data.",
	preview,
	len(filtered_df)
	)
	return [gr.update()] * 10 + ["No data loaded.", None, 0]

	# Pagination Logic for Filtered Preview
	def update_filtered_offset(current_offset, direction, batch_size=20):
	if direction == "next":
	return current_offset + batch_size
	else:
	return max(0, current_offset - batch_size)

	prev_filtered_btn.click(
	fn=update_filtered_offset,
	inputs=[filtered_offset, gr.State("prev"), filtered_batch_size],
	outputs=[filtered_offset]
	).then(
	fn=apply_filters_and_update,
	inputs=[
	num_filter_dropdown, num_min, num_max,
	cat_filter_dropdown, cat_filter_values,
	cat_filter_dropdown_2, cat_filter_values_2,
	date_filter_dropdown, date_start, date_end,
	filtered_offset
	],
	outputs=[filter_summary, filtered_preview, row_count, filtered_offset]
	)

	next_filtered_btn.click(
	fn=update_filtered_offset,
	inputs=[filtered_offset, gr.State("next"), filtered_batch_size],
	outputs=[filtered_offset]
	).then(
	fn=apply_filters_and_update,
	inputs=[
	num_filter_dropdown, num_min, num_max,
	cat_filter_dropdown, cat_filter_values,
	cat_filter_dropdown_2, cat_filter_values_2,
	date_filter_dropdown, date_start, date_end,
	filtered_offset
	],
	outputs=[filter_summary, filtered_preview, row_count, filtered_offset]
	)

	# Update filter button to reset offset
	filter_btn.click(
	fn=lambda args: apply_filters_and_update(args, offset=0),
	inputs=[
	num_filter_dropdown, num_min, num_max,
	cat_filter_dropdown, cat_filter_values,
	cat_filter_dropdown_2, cat_filter_values_2,
	date_filter_dropdown, date_start, date_end
	],
	outputs=[filter_summary, filtered_preview, row_count, filtered_offset]
	)

	# Pagination Logic for Data Preview
	def update_preview_offset(current_offset, direction, batch_size=10):
	if direction == "next":
	return current_offset + batch_size
	else:
	return max(0, current_offset - batch_size)

	prev_preview_btn.click(
	fn=update_preview_offset,
	inputs=[preview_offset, gr.State("prev"), preview_batch_size],
	outputs=[preview_offset]
	).then(
	fn=update_preview_pagination,
	inputs=[preview_offset],
	outputs=[upload_status, data_preview, preview_offset]
	)

	next_preview_btn.click(
	fn=update_preview_offset,
	inputs=[preview_offset, gr.State("next"), preview_batch_size],
	outputs=[preview_offset]
	).then(
	fn=update_preview_pagination,
	inputs=[preview_offset],
	outputs=[upload_status, data_preview, preview_offset]
	)

	# Update upload button to reset offset
	# Update upload button to reset offset


	# Tab 4: Visualizations
	with gr.Tab("📊 Visualizations"):
	gr.Markdown("## 🎨 Create Interactive Visualizations")

	with gr.Row():
	# Left Column: Chart Settings
	with gr.Column(scale=1):
	gr.Markdown("### 1. Chart Settings")
	viz_type = gr.Dropdown(
	label="Select Chart Type",
	choices=["Time Series", "Distribution (Histogram)", "Distribution (Box Plot)",
	"Bar Chart", "Pie Chart", "Scatter Plot", "Correlation Heatmap"],
	value="Bar Chart"
	)

	# Dynamic help text based on selection could be added here, but static for now
	gr.Markdown("""
	<div style="font-size: 12px; color: #888; margin-bottom: 10px;">
	<b>Guide:</b><br>
	• <b>Bar/Pie</b>: Compare categories<br>
	• <b>Time Series</b>: Trends over time<br>
	• <b>Scatter</b>: Relationships between numbers<br>
	• <b>Distribution</b>: Spread of data
	</div>
	""")

	agg_method = gr.Dropdown(
	label="Aggregation (for grouped data)",
	choices=["sum", "mean", "count", "median"],
	value="sum"
	)
	top_n = gr.Slider(label="Top N Categories", minimum=5, maximum=20, value=10, step=1)

	# Pagination Controls
	with gr.Row():
	prev_batch_btn = gr.Button("⬅️ Prev Batch", size="sm")
	next_batch_btn = gr.Button("Next Batch ➡️", size="sm")

	viz_offset = gr.State(value=0)

	viz_btn = gr.Button("🚀 Create Visualization", variant="primary")

	# Right Column: Data Selection
	with gr.Column(scale=2):
	gr.Markdown("### 2. Select Data")
	with gr.Row():
	x_column_viz = gr.Dropdown(label="X-Axis / Category Column", choices=[], interactive=True)
	y_column_viz = gr.Dropdown(label="Y-Axis / Value Column", choices=[], interactive=True)

	color_column_viz = gr.Dropdown(label="Color / Grouping (Optional)", choices=[], interactive=True)

	# Visualization Output Area
	with gr.Row():
	with gr.Column(scale=3):
	viz_plot = gr.Plot(label="Interactive Chart")

	with gr.Column(scale=1):
	gr.Markdown("### 💡 AI Insights")
	viz_insights = gr.Markdown(value="Insights will appear here after generating a chart.")
	viz_status = gr.Textbox(label="Status", interactive=False, visible=True)

	# Wrapper to reset offset when creating new visualization
	def create_viz_reset(viz_type, x_col, y_col, color_col, agg_method, top_n):
	return create_visualization(viz_type, x_col, y_col, color_col, agg_method, top_n, offset=0)

	viz_btn.click(
	fn=create_viz_reset,
	inputs=[viz_type, x_column_viz, y_column_viz, color_column_viz, agg_method, top_n],
	outputs=[viz_plot, viz_status, viz_insights, viz_offset]
	)

	# Export Toolbar
	with gr.Row(variant="panel"):
	with gr.Column(scale=3):
	gr.Markdown("Export Options:")
	with gr.Column(scale=1):
	export_viz_btn = gr.Button("💾 Download PNG", size="sm")

	export_viz_file = gr.File(label="Download File", visible=False)
	export_viz_status = gr.Textbox(visible=False)

	# Pagination Logic
	def update_viz_offset(current_offset, direction, top_n):
	if direction == "next":
	return current_offset + top_n
	else:
	return max(0, current_offset - top_n)

	prev_batch_btn.click(
	fn=update_viz_offset,
	inputs=[viz_offset, gr.State("prev"), top_n],
	outputs=[viz_offset]
	).then(
	fn=create_visualization,
	inputs=[viz_type, x_column_viz, y_column_viz, color_column_viz, agg_method, top_n, viz_offset],
	outputs=[viz_plot, viz_status, viz_insights, viz_offset]
	)

	next_batch_btn.click(
	fn=update_viz_offset,
	inputs=[viz_offset, gr.State("next"), top_n],
	outputs=[viz_offset]
	).then(
	fn=create_visualization,
	inputs=[viz_type, x_column_viz, y_column_viz, color_column_viz, agg_method, top_n, viz_offset],
	outputs=[viz_plot, viz_status, viz_insights, viz_offset]
	)

	export_viz_btn.click(
	fn=lambda: export_visualization(viz_plot.value) if viz_plot.value else (None, "No visualization to export"),
	inputs=[],
	outputs=[export_viz_file, export_viz_status]
	)

	# Show file download when ready
	def show_download(file, status):
	return gr.update(visible=True), status

	export_viz_btn.click(
	fn=show_download,
	inputs=[export_viz_file, export_viz_status],
	outputs=[export_viz_file, export_viz_status]
	)

	# Tab 5: Insights
	with gr.Tab("💡 Insights"):
	gr.Markdown("## 🧠 Advanced AI Insights")
	gr.Markdown("Deep dive analysis of your data's performance, drivers, and risks.")

	with gr.Row():
	insights_source = gr.Radio(
	choices=["Full Dataset", "Filtered Data"],
	value="Filtered Data",
	label="Analysis Scope",
	info="Choose whether to analyze the entire dataset or just the filtered subset."
	)
	insights_btn = gr.Button("Generate Advanced Insights", variant="primary", size="lg")

	# Executive Summary Row
	gr.Markdown("### 📋 Executive Summary")
	with gr.Row():
	exec_card1 = gr.Markdown()
	exec_card2 = gr.Markdown()
	exec_card3 = gr.Markdown()

	# Detailed Analysis Rows
	with gr.Row():
	with gr.Column():
	gr.Markdown("### 🏆 Pareto Analysis (80/20 Rule)")
	pareto_output = gr.Markdown("Click Generate to see vital few categories")

	with gr.Column():
	gr.Markdown("### 🔑 Key Drivers")
	drivers_output = gr.Markdown("Click Generate to see what drives your metrics")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📊 Segment Performance")
	segments_output = gr.Markdown("Click Generate to see segment analysis")

	with gr.Column():
	gr.Markdown("### ⚠️ Anomalies & Risks")
	anomalies_output = gr.Markdown("Click Generate to see detected anomalies")

	with gr.Accordion("📄 View Full Report", open=False):
	full_report_output = gr.Markdown("Generate insights to see the full report.")

	def update_insights(source):
	# from advanced_insights import generate_advanced_insights, format_advanced_insights
	global filtered_df, current_df

	target_df = filtered_df if source == "Filtered Data" else current_df

	if target_df is None:
	return ["Please upload data first."] * 8

	try:
	insights = generate_advanced_insights(target_df)

	# Format Executive Summary cards
	cards = insights.get('executive_summary', [])
	card_outputs = []
	for card in cards:
	html = f"""
	<div style="background-color: #2b2b2b; padding: 20px; border-radius: 10px; border: 1px solid #444;">
	<div style="font-size: 24px; margin-bottom: 10px;">{card['icon']}</div>
	<div style="color: #888; font-size: 14px; text-transform: uppercase;">{card['title']}</div>
	<div style="color: white; font-size: 28px; font-weight: bold; margin: 5px 0;">{card['value']}</div>
	<div style="color: #aaa; font-size: 12px;">{card['description']}</div>
	</div>
	"""
	card_outputs.append(html)

	# Fill remaining cards if less than 3
	while len(card_outputs) < 3:
	card_outputs.append("")

	# Format Pareto
	pareto = insights.get('pareto_analysis', [])
	if pareto:
	pareto_text = ""
	for p in pareto:
	pareto_text += f"#### {p['category']} Analysis\n"
	pareto_text += f"{p['insight']}\n\n"
	pareto_text += "Vital Few (Top 5):\n" + ", ".join([f"`{x}`" for x in p['vital_few']]) + "\n\n---\n"
	else:
	pareto_text = "No significant Pareto patterns found."

	# Format Drivers
	drivers = insights.get('key_drivers', [])
	if drivers:
	drivers_text = ""
	for d in drivers:
	drivers_text += f"- {d['insight']}\n"
	else:
	drivers_text = "No strong correlations found to identify key drivers."

	# Format Segments
	segments = insights.get('segment_analysis', [])
	if segments:
	seg_text = ""
	for s in segments:
	seg_text += f"- {s['insight']}\n"
	else:
	seg_text = "Not enough categorical data for segment analysis."

	# Format Anomalies
	anomalies = insights.get('anomalies', [])
	if anomalies:
	anom_text = ""
	for a in anomalies:
	anom_text += f"- {a['insight']}\n"
	else:
	anom_text = "✅ No significant anomalies detected."

	# Generate Full Report
	full_report = format_advanced_insights(insights)

	return card_outputs + [pareto_text, drivers_text, seg_text, anom_text, full_report]

	except Exception as e:
	return [f"Error: {str(e)}"] * 8

	insights_btn.click(
	fn=update_insights,
	inputs=[insights_source],
	outputs=[exec_card1, exec_card2, exec_card3, pareto_output, drivers_output, segments_output, anomalies_output, full_report_output]
	)

	# Tab 6: Smart Dashboard
	with gr.Tab("🚀 Smart Dashboard"):


	smart_btn = gr.Button("✨ Generate Smart Dashboard", variant="primary", size="lg")

	with gr.Row():
	with gr.Column():
	plot1 = gr.Plot()
	desc1 = gr.Markdown()
	with gr.Column():
	plot2 = gr.Plot()
	desc2 = gr.Markdown()

	with gr.Row():
	with gr.Column():
	plot3 = gr.Plot()
	desc3 = gr.Markdown()
	with gr.Column():
	plot4 = gr.Plot()
	desc4 = gr.Markdown()

	with gr.Row():
	with gr.Column():
	plot5 = gr.Plot()
	desc5 = gr.Markdown()
	with gr.Column():
	plot6 = gr.Plot()
	desc6 = gr.Markdown()

	smart_status = gr.Textbox(label="Status", interactive=False)

	def update_smart_dashboard():
	# from smart_dashboard import generate_smart_dashboard
	global filtered_df

	if filtered_df is None:
	return [None] * 12 + ["Please upload data first."]

	try:
	items = generate_smart_dashboard(filtered_df)
	outputs = []

	# Fill up to 6 slots
	for i in range(6):
	if i < len(items):
	fig, title, insight = items[i]
	outputs.append(fig)
	outputs.append(f"### {title}\n\n{insight}")
	else:
	outputs.append(None)
	outputs.append("")

	outputs.append(f"Successfully generated {len(items)} visualizations based on your data patterns!")
	return outputs
	except Exception as e:
	return [None] * 12 + [f"Error generating dashboard: {str(e)}"]

	smart_btn.click(
	fn=update_smart_dashboard,
	inputs=[],
	outputs=[plot1, desc1, plot2, desc2, plot3, desc3, plot4, desc4, plot5, desc5, plot6, desc6, smart_status]
	)


	# Tab 7: Comparison
	with gr.Tab("⚖️ Compare"):
	gr.Markdown("## Head-to-Head Comparison")
	gr.Markdown("Compare two segments of your data side-by-side (e.g., Region A vs Region B).")

	with gr.Row():
	# Group A
	with gr.Column(variant="panel"):
	gr.Markdown("### Group A")
	comp_cat_col_a = gr.Dropdown(label="Filter Column", choices=[], interactive=True)
	comp_cat_val_a = gr.Dropdown(label="Filter Value", choices=[], interactive=True)

	# Group B
	with gr.Column(variant="panel"):
	gr.Markdown("### Group B")
	comp_cat_col_b = gr.Dropdown(label="Filter Column", choices=[], interactive=True)
	comp_cat_val_b = gr.Dropdown(label="Filter Value", choices=[], interactive=True)

	comp_btn = gr.Button("⚔️ Compare Groups", variant="primary", size="lg")

	gr.Markdown("### Comparison Results")
	with gr.Row():
	comp_metric1 = gr.HTML()
	comp_metric2 = gr.HTML()
	comp_metric3 = gr.HTML()
	comp_metric4 = gr.HTML()

	comp_plot = gr.Plot(label="Side-by-Side Visualization")

	# Update values when column selected
	def update_comp_values(col):
	global current_df
	if current_df is not None and col:
	unique_vals = current_df[col].unique().tolist()
	if len(unique_vals) > 1000:
	# Limit to first 1000 to prevent freeze
	return gr.update(choices=unique_vals[:1000], label=f"Filter Value (Showing 1000/{len(unique_vals)})")
	return gr.update(choices=unique_vals, label="Filter Value")
	return gr.update(choices=[], label="Filter Value")

	comp_cat_col_a.change(update_comp_values, comp_cat_col_a, comp_cat_val_a)
	comp_cat_col_b.change(update_comp_values, comp_cat_col_b, comp_cat_val_b)

	def run_comparison(col_a, val_a, col_b, val_b):
	# from comparison import compare_datasets
	global current_df

	if current_df is None:
	return ["Please upload data first."] * 4 + [None]

	# Build filters
	filter_a = {col_a: {'type': 'categorical', 'values': [val_a]}} if col_a and val_a else {}
	filter_b = {col_b: {'type': 'categorical', 'values': [val_b]}} if col_b and val_b else {}

	label_a = f"{col_a}={val_a}" if col_a else "All Data"
	label_b = f"{col_b}={val_b}" if col_b else "All Data"

	metrics, fig = compare_datasets(current_df, filter_a, filter_b, label_a, label_b)

	# Pad metrics to 4
	while len(metrics) < 4:
	metrics.append("")

	return metrics[:4] + [fig]

	comp_btn.click(
	fn=run_comparison,
	inputs=[comp_cat_col_a, comp_cat_val_a, comp_cat_col_b, comp_cat_val_b],
	outputs=[comp_metric1, comp_metric2, comp_metric3, comp_metric4, comp_plot]
	)

	comp_btn.click(
	fn=run_comparison,
	inputs=[comp_cat_col_a, comp_cat_val_a, comp_cat_col_b, comp_cat_val_b],
	outputs=[comp_metric1, comp_metric2, comp_metric3, comp_metric4, comp_plot]
	)

	# Tab 8: Segment Explorer
	with gr.Tab("🔍 Segment Explorer"):
	gr.Markdown("## Deep Dive Explorer")
	gr.Markdown("Select a category to see a detailed mini-dashboard for that specific segment.")

	with gr.Row():
	drill_col = gr.Dropdown(label="Select Category Column", choices=[], interactive=True)
	drill_val = gr.Dropdown(label="Select Value", choices=[], interactive=True)

	drill_btn = gr.Button("🔎 Analyze Segment", variant="primary")

	gr.Markdown("### Segment Overview")
	with gr.Row():
	drill_stat1 = gr.Number(label="Total Records")
	drill_stat2 = gr.Number(label="% of Total Data")
	drill_stat3 = gr.Number(label="Total Value (Sum of 1st Num Col)")

	with gr.Row():
	drill_plot1 = gr.Plot(label="Trend over Time")
	drill_plot2 = gr.Plot(label="Top Associations")

	# Update values
	drill_col.change(update_comp_values, drill_col, drill_val)

	def run_drill_down(col, val):
	global current_df
	if current_df is None:
	return [0, 0, 0, None, None]

	if not col or not val:
	return [0, 0, 0, None, None]

	# Filter data
	subset = current_df[current_df[col] == val]

	# Stats
	count = len(subset)
	pct = (count / len(current_df)) * 100

	col_types = get_column_types(current_df)
	total_val = 0
	if col_types['numerical']:
	total_val = subset[col_types['numerical'][0]].sum()

	# Plot 1: Trend (if date exists)
	fig1 = None
	if col_types['datetime'] and col_types['numerical']:
	date_col = col_types['datetime'][0]
	num_col = col_types['numerical'][0]
	agg = subset.groupby(date_col)[num_col].sum().reset_index()
	fig1 = px.line(agg, x=date_col, y=num_col, title=f"{num_col} Trend for {val}")

	# Plot 2: Top Category (if another cat exists)
	fig2 = None
	other_cats = [c for c in col_types['categorical'] if c != col]
	if other_cats:
	cat2 = other_cats[0]
	top = subset[cat2].value_counts().head(10).reset_index()
	top.columns = [cat2, 'Count']
	fig2 = px.bar(top, x=cat2, y='Count', title=f"Top {cat2} in {val}")

	return count, round(pct, 1), round(total_val, 2), fig1, fig2

	drill_btn.click(
	fn=run_drill_down,
	inputs=[drill_col, drill_val],
	outputs=[drill_stat1, drill_stat2, drill_stat3, drill_plot1, drill_plot2]
	)

	# Tab 9: Export
	with gr.Tab("💾 Export"):
	gr.Markdown("## Export Your Data")

	export_data_btn = gr.Button("Export Filtered Data as CSV", variant="primary")
	export_file = gr.File(label="Download CSV")
	export_status = gr.Textbox(label="Export Status", interactive=False)

	export_data_btn.click(
	fn=export_filtered_data,
	inputs=[],
	outputs=[export_file, export_status]
	)

	# Connect upload button to update all dropdowns
	upload_btn.click(
	fn=upload_and_preview_data,
	inputs=[file_input],
	outputs=[
	upload_status, data_preview, row_count, preview_offset,
	num_filter_dropdown, num_min, num_max,
	cat_filter_dropdown, cat_filter_values,
	cat_filter_dropdown_2, cat_filter_values_2,
	date_filter_dropdown, date_start, date_end,
	x_column_viz, y_column_viz, color_column_viz,
	comp_cat_col_a, comp_cat_col_b, drill_col
	]
	)




	return demo


	if __name__ == "__main__":
	demo = create_dashboard()
	demo.launch(share=False, server_name="0.0.0.0", server_port=7860)