File size: 4,127 Bytes
5b42f96
 
 
 
 
 
 
 
433c2fa
5b42f96
433c2fa
 
5b42f96
 
 
 
 
 
 
 
433c2fa
5b42f96
 
 
 
 
433c2fa
5b42f96
 
 
 
433c2fa
5b42f96
 
 
 
 
433c2fa
5b42f96
 
 
 
 
 
 
433c2fa
5b42f96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433c2fa
5b42f96
 
 
 
 
 
 
433c2fa
 
5b42f96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433c2fa
5b42f96
 
 
433c2fa
5b42f96
 
 
433c2fa
5b42f96
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import io

# --- 1. Function to process the uploaded file and generate stats/dataframe ---
def analyze_csv(file):
    """Reads a CSV file, calculates statistics, and prepares components for plotting."""
    if file is None:
        # Clear previous outputs if a file is unuploaded or if this is the initial state
        return None, "Please upload a CSV file.", gr.Dropdown(choices=[], label="Select Column to Plot"), None

    try:
        # Read the uploaded file object (it's a temporary file path)
        df = pd.read_csv(file.name)

        # Basic descriptive statistics
        stats = df.describe().round(2).T
        
        # Convert stats to markdown string for display
        stats_markdown = stats.to_markdown()

        # Get list of numeric columns for the dropdown
        numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
        
        # Return the full DataFrame, the statistics table, the updated column dropdown, and the DataFrame state
        return df, stats_markdown, gr.Dropdown(choices=numeric_cols, label="Select Column to Plot"), df
        
    except Exception as e:
        error_message = f"Error processing file: {e}"
        # Return error message and clear other components
        return None, error_message, gr.Dropdown(choices=[], label="Select Column to Plot"), None


# --- 2. Function to generate a plot for a selected column ---
def generate_plot(df_state, column_name):
    """Generates a distribution plot (histogram) for the selected column."""
    if df_state is None or column_name is None or column_name == "":
        return None

    try:
        # Reset Matplotlib figure for a clean slate
        plt.figure(figsize=(8, 5))
        
        # Use a Histogram for distribution of the selected numeric column
        sns.histplot(df_state[column_name], kde=True)
        
        plt.title(f'Distribution of {column_name}')
        plt.xlabel(column_name)
        plt.ylabel('Frequency')
        
        # Save plot to an in-memory buffer
        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        plt.close() # Close figure to free memory
        buf.seek(0)
        
        return buf.read()

    except Exception as e:
        print(f"Plotting Error: {e}")
        return None


# --- 3. Gradio Interface Definition ---
with gr.Blocks(title="CSV Data Analyzer") as demo:
    gr.Markdown("## 📈 CSV Data Analyzer")
    gr.Markdown("Upload your CSV file and see instant statistics and visualizations.")

    # State component to hold the DataFrame across function calls
    # This is essential for passing the DataFrame from `analyze_csv` to `generate_plot`
    df_state = gr.State(None)

    # Input components
    csv_file = gr.File(label="Upload CSV File (.csv)", file_types=[".csv"])
    
    with gr.Row():
        # Outputs
        # **FIXED:** Removed the unsupported 'height=200' argument.
        df_output = gr.Dataframe(label="Uploaded Data Preview", interactive=False)
        stats_output = gr.Markdown(label="Descriptive Statistics")
    
    gr.HTML("<hr>")

    # Plotting section
    with gr.Row():
        # Component that will be populated with column names
        column_dropdown = gr.Dropdown(label="Select Column to Plot", interactive=True)
        plot_button = gr.Button("Generate Plot")
    
    plot_output = gr.Plot(label="Column Distribution Plot")


    # --- 4. Event Handling (Interactions) ---
    
    # Triggered when a file is uploaded (or cleared)
    csv_file.upload(
        analyze_csv, 
        inputs=[csv_file], 
        # Note: The output for the dropdown component is updated with new choices here.
        outputs=[df_output, stats_output, column_dropdown, df_state]
    )

    # Triggered when the plot button is clicked
    plot_button.click(
        generate_plot, 
        inputs=[df_state, column_dropdown], 
        outputs=[plot_output]
    )


# Launch the app for the Hugging Face Space environment
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)