auto-bold-gen / data_utils.py
mmrech's picture
Deploy Gradio app with multiple files
7efdf62 verified
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import gradio as gr
def generate_initial_data(n_rows=50):
"""Generates a synthetic sales DataFrame for simulation."""
dates = [datetime(2023, 1, 1) + timedelta(days=i) for i in range(n_rows)]
np.random.seed(42)
data = {
'Date': dates,
'Region': np.random.choice(['North', 'South', 'East', 'West'], n_rows),
'Product': np.random.choice(['A', 'B', 'C', 'D'], n_rows),
'Sales': np.random.randint(100, 500, n_rows),
'Quantity': np.random.randint(5, 50, n_rows),
'Cost': np.random.uniform(10, 50, n_rows).round(2)
}
df = pd.DataFrame(data)
# Calculate Profit
df['Profit'] = (df['Sales'] - df['Cost']) * df['Quantity']
return df
def process_data(df, selected_region, min_profit):
"""Filters and aggregates data based on user controls."""
if df is None:
return pd.DataFrame(), pd.DataFrame()
# Ensure data from Gradio component is handled as a DataFrame
# This is important as gr.State might pass the raw object
if not isinstance(df, pd.DataFrame):
df = pd.DataFrame(df)
# Filtering
if selected_region != "All":
df = df[df['Region'] == selected_region]
df = df[df['Profit'] >= min_profit]
# Aggregation for Line Plot (Daily Sales Trend)
if not df.empty:
df['Date'] = pd.to_datetime(df['Date'])
daily_summary = df.groupby(df['Date'].dt.date)['Sales'].sum().reset_index()
daily_summary.rename(columns={'Date': 'Day', 'Sales': 'Total Sales'}, inplace=True)
else:
daily_summary = pd.DataFrame(columns=['Day', 'Total Sales'])
# Aggregation for Bar Plot (Product breakdown)
product_summary = df.groupby('Product')['Quantity'].sum().reset_index()
return daily_summary, product_summary
def handle_data_selection(df, evt: gr.SelectData):
"""Handles the selection event on the DataFrame component."""
if df is None or not evt.index:
return "No data selected."
if not isinstance(df, pd.DataFrame):
df = pd.DataFrame(df)
row_index = evt.index[0]
if row_index >= len(df):
return "Invalid row selected."
row_data = df.iloc[row_index].to_dict()
output_text = f"Selected Row {row_index} details:\n"
for key, value in row_data.items():
# Handle datetime objects for display
if isinstance(value, (datetime, pd.Timestamp)):
value = value.strftime('%Y-%m-%d')
elif isinstance(value, np.generic):
value = value.item() # Convert numpy types to native Python types
output_text += f" {key}: {value}\n"
return output_text