Spaces:

mic3333
/

dash

Sleeping

File size: 12,328 Bytes

ac0793a
1d76c68
 
ac0793a

import gradio as gr
import plotly.express as px
import pandas as pd
import io

# Store datasets in a dictionary (acts as our "database")
datasets = {}

# Load default dataset
default_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
datasets['Gapminder'] = default_df

# Function to load different built-in datasets
def load_builtin_dataset(dataset_name):
    """Load various built-in datasets"""
    try:
        if dataset_name == "Gapminder":
            df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
            datasets[dataset_name] = df
            return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
        
        elif dataset_name == "Iris":
            df = px.data.iris()
            datasets[dataset_name] = df
            return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
        
        elif dataset_name == "Tips":
            df = px.data.tips()
            datasets[dataset_name] = df
            return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
        
        elif dataset_name == "Stock Data":
            df = px.data.stocks()
            # Reshape from wide to long format for better analysis
            df = df.melt(id_vars='date', var_name='company', value_name='stock_price')
            df['date'] = pd.to_datetime(df['date'])
            datasets[dataset_name] = df
            return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
        
        elif dataset_name == "Wind Data":
            df = px.data.wind()
            datasets[dataset_name] = df
            return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
            
    except Exception as e:
        return None, f"❌ Error loading {dataset_name}: {str(e)}"

# Function to handle file uploads
def upload_dataset(file, custom_name):
    """Handle CSV/Excel file uploads"""
    if file is None:
        return None, "Please upload a file", gr.update(choices=list(datasets.keys()))
    
    try:
        # Determine file type and read accordingly
        if file.name.endswith('.csv'):
            df = pd.read_csv(file.name)
        elif file.name.endswith(('.xlsx', '.xls')):
            df = pd.read_excel(file.name)
        else:
            return None, "❌ Unsupported file format. Please upload CSV or Excel.", gr.update()
        
        # Store with custom name or filename
        dataset_name = custom_name if custom_name else file.name.split('/')[-1].split('.')[0]
        datasets[dataset_name] = df
        
        return df, f"✅ Uploaded {dataset_name}: {len(df)} rows, {len(df.columns)} columns", gr.update(choices=list(datasets.keys()), value=dataset_name)
        
    except Exception as e:
        return None, f"❌ Error reading file: {str(e)}", gr.update()

# Function to switch between datasets
def switch_dataset(dataset_name):
    """Switch to a different dataset"""
    if dataset_name in datasets:
        df = datasets[dataset_name]
        # Get column info
        numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
        categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
        all_cols = df.columns.tolist()
        
        info = f"""
        ### Dataset: {dataset_name}
        - **Rows**: {len(df)}
        - **Columns**: {len(df.columns)}
        - **Numeric columns**: {', '.join(numeric_cols[:5])}{'...' if len(numeric_cols) > 5 else ''}
        - **Categorical columns**: {', '.join(categorical_cols[:5])}{'...' if len(categorical_cols) > 5 else ''}
        """
        
        return (
            df.head(10),  # Preview
            info,  # Info
            gr.update(choices=all_cols, value=all_cols[0] if all_cols else None),  # X-axis
            gr.update(choices=numeric_cols, value=numeric_cols[0] if numeric_cols else None),  # Y-axis
            gr.update(choices=[""] + categorical_cols, value=""),  # Color
            gr.update(choices=[""] + numeric_cols, value=""),  # Size
            df  # Store current df
        )
    else:
        return None, "Dataset not found", gr.update(), gr.update(), gr.update(), gr.update(), None

# Dynamic plotting function
def create_plot(df, plot_type, x_col, y_col, color_col, size_col):
    """Create different plot types based on current dataset and selections"""
    if df is None or x_col is None:
        return None
    
    try:
        # Handle empty string selections
        color_col = None if color_col == "" else color_col
        size_col = None if size_col == "" else size_col
        
        # Create different plot types
        if plot_type == "Scatter":
            fig = px.scatter(df, x=x_col, y=y_col, color=color_col, size=size_col,
                           title=f"Scatter: {x_col} vs {y_col}")
            
        elif plot_type == "Line":
            fig = px.line(df, x=x_col, y=y_col, color=color_col,
                         title=f"Line: {x_col} vs {y_col}")
            
        elif plot_type == "Bar":
            # For bar charts, aggregate if necessary
            if color_col:
                fig = px.bar(df, x=x_col, y=y_col, color=color_col,
                           title=f"Bar: {x_col} vs {y_col}")
            else:
                fig = px.bar(df, x=x_col, y=y_col,
                           title=f"Bar: {x_col} vs {y_col}")
                
        elif plot_type == "Histogram":
            fig = px.histogram(df, x=x_col, color=color_col,
                             title=f"Histogram of {x_col}")
            
        elif plot_type == "Box":
            fig = px.box(df, x=x_col, y=y_col, color=color_col,
                        title=f"Box plot: {x_col} vs {y_col}")
            
        elif plot_type == "Heatmap":
            # Create correlation matrix for numeric columns
            numeric_df = df.select_dtypes(include=['number'])
            if len(numeric_df.columns) > 1:
                corr = numeric_df.corr()
                fig = px.imshow(corr, text_auto=True, title="Correlation Heatmap")
            else:
                return None
        
        fig.update_layout(height=500)
        return fig
        
    except Exception as e:
        print(f"Plot error: {e}")
        return None

# Create the Gradio interface
with gr.Blocks(title="Dynamic Dataset Explorer", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 📊 Dynamic Dataset Explorer
    Upload your own data or explore built-in datasets with automatic visualization
    """)
    
    # Hidden state to store current dataframe
    current_df = gr.State(value=default_df)
    
    with gr.Tabs():
        # Tab 1: Dataset Management
        with gr.TabItem("📁 Dataset Management"):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### Load Built-in Dataset")
                    builtin_choice = gr.Dropdown(
                        choices=["Gapminder", "Iris", "Tips", "Stock Data", "Wind Data"],
                        value="Gapminder",
                        label="Select Dataset"
                    )
                    load_builtin_btn = gr.Button("Load Dataset", variant="primary")
                    
                    gr.Markdown("### Upload Custom Dataset")
                    file_upload = gr.File(label="Upload CSV or Excel", file_types=[".csv", ".xlsx", ".xls"])
                    custom_name = gr.Textbox(label="Dataset Name (optional)", placeholder="My Dataset")
                    upload_btn = gr.Button("Upload", variant="primary")
                    
                    gr.Markdown("### Active Datasets")
                    dataset_selector = gr.Dropdown(
                        choices=list(datasets.keys()),
                        value="Gapminder",
                        label="Switch Dataset"
                    )
                
                with gr.Column(scale=2):
                    status_msg = gr.Markdown("Ready to load data")
                    data_info = gr.Markdown()
                    data_preview = gr.Dataframe(label="Data Preview (first 10 rows)")
        
        # Tab 2: Dynamic Visualization
        with gr.TabItem("📈 Visualization"):
            with gr.Row():
                with gr.Column(scale=1):
                    plot_type = gr.Radio(
                        choices=["Scatter", "Line", "Bar", "Histogram", "Box", "Heatmap"],
                        value="Scatter",
                        label="Plot Type"
                    )
                    
                    x_axis = gr.Dropdown(label="X Axis", choices=[], interactive=True)
                    y_axis = gr.Dropdown(label="Y Axis", choices=[], interactive=True)
                    color_by = gr.Dropdown(label="Color By (optional)", choices=[], interactive=True)
                    size_by = gr.Dropdown(label="Size By (optional)", choices=[], interactive=True)
                    
                    plot_btn = gr.Button("Create Plot", variant="primary")
                
                with gr.Column(scale=2):
                    plot_output = gr.Plot(label="Visualization")
        
        # Tab 3: Data Analysis
        with gr.TabItem("🔍 Data Analysis"):
            with gr.Row():
                with gr.Column():
                    analysis_type = gr.Radio(
                        choices=["Summary Statistics", "Missing Values", "Data Types", "Unique Values"],
                        value="Summary Statistics",
                        label="Analysis Type"
                    )
                    analyze_btn = gr.Button("Analyze", variant="primary")
                
                with gr.Column():
                    analysis_output = gr.Markdown()
            
            def analyze_data(df, analysis_type):
                """Perform different types of data analysis"""
                if df is None:
                    return "No dataset loaded"
                
                if analysis_type == "Summary Statistics":
                    return f"```\n{df.describe().to_string()}\n```"
                elif analysis_type == "Missing Values":
                    missing = df.isnull().sum()
                    return f"```\n{missing[missing > 0].to_string()}\n```" if missing.any() else "No missing values!"
                elif analysis_type == "Data Types":
                    return f"```\n{df.dtypes.to_string()}\n```"
                elif analysis_type == "Unique Values":
                    unique_counts = df.nunique()
                    return f"```\n{unique_counts.to_string()}\n```"
    
    # Event handlers
    load_builtin_btn.click(
        load_builtin_dataset,
        inputs=[builtin_choice],
        outputs=[data_preview, status_msg]
    ).then(
        lambda: gr.update(choices=list(datasets.keys())),
        outputs=[dataset_selector]
    )
    
    upload_btn.click(
        upload_dataset,
        inputs=[file_upload, custom_name],
        outputs=[data_preview, status_msg, dataset_selector]
    )
    
    # When dataset is switched, update everything
    dataset_selector.change(
        switch_dataset,
        inputs=[dataset_selector],
        outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
    )
    
    # Create plot based on selections
    plot_btn.click(
        create_plot,
        inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
        outputs=[plot_output]
    )
    
    # Auto-update plot when parameters change
    for component in [plot_type, x_axis, y_axis, color_by, size_by]:
        component.change(
            create_plot,
            inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
            outputs=[plot_output]
        )
    
    # Analysis
    analyze_btn.click(
        analyze_data,
        inputs=[current_df, analysis_type],
        outputs=[analysis_output]
    )
    
    # Load initial dataset
    demo.load(
        switch_dataset,
        inputs=[dataset_selector],
        outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
    )

if __name__ == "__main__":
    demo.launch(share=False, debug=True)