import gradio as gr import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import io # --- 1. Function to process the uploaded file and generate stats/dataframe --- def analyze_csv(file): """Reads a CSV file, calculates statistics, and prepares components for plotting.""" if file is None: # Clear previous outputs if a file is unuploaded or if this is the initial state return None, "Please upload a CSV file.", gr.Dropdown(choices=[], label="Select Column to Plot"), None try: # Read the uploaded file object (it's a temporary file path) df = pd.read_csv(file.name) # Basic descriptive statistics stats = df.describe().round(2).T # Convert stats to markdown string for display stats_markdown = stats.to_markdown() # Get list of numeric columns for the dropdown numeric_cols = df.select_dtypes(include=['number']).columns.tolist() # Return the full DataFrame, the statistics table, the updated column dropdown, and the DataFrame state return df, stats_markdown, gr.Dropdown(choices=numeric_cols, label="Select Column to Plot"), df except Exception as e: error_message = f"Error processing file: {e}" # Return error message and clear other components return None, error_message, gr.Dropdown(choices=[], label="Select Column to Plot"), None # --- 2. Function to generate a plot for a selected column --- def generate_plot(df_state, column_name): """Generates a distribution plot (histogram) for the selected column.""" if df_state is None or column_name is None or column_name == "": return None try: # Reset Matplotlib figure for a clean slate plt.figure(figsize=(8, 5)) # Use a Histogram for distribution of the selected numeric column sns.histplot(df_state[column_name], kde=True) plt.title(f'Distribution of {column_name}') plt.xlabel(column_name) plt.ylabel('Frequency') # Save plot to an in-memory buffer buf = io.BytesIO() plt.savefig(buf, format='png') plt.close() # Close figure to free memory buf.seek(0) return buf.read() except Exception as e: print(f"Plotting Error: {e}") return None # --- 3. Gradio Interface Definition --- with gr.Blocks(title="CSV Data Analyzer") as demo: gr.Markdown("## 📈 CSV Data Analyzer") gr.Markdown("Upload your CSV file and see instant statistics and visualizations.") # State component to hold the DataFrame across function calls # This is essential for passing the DataFrame from `analyze_csv` to `generate_plot` df_state = gr.State(None) # Input components csv_file = gr.File(label="Upload CSV File (.csv)", file_types=[".csv"]) with gr.Row(): # Outputs # **FIXED:** Removed the unsupported 'height=200' argument. df_output = gr.Dataframe(label="Uploaded Data Preview", interactive=False) stats_output = gr.Markdown(label="Descriptive Statistics") gr.HTML("
") # Plotting section with gr.Row(): # Component that will be populated with column names column_dropdown = gr.Dropdown(label="Select Column to Plot", interactive=True) plot_button = gr.Button("Generate Plot") plot_output = gr.Plot(label="Column Distribution Plot") # --- 4. Event Handling (Interactions) --- # Triggered when a file is uploaded (or cleared) csv_file.upload( analyze_csv, inputs=[csv_file], # Note: The output for the dropdown component is updated with new choices here. outputs=[df_output, stats_output, column_dropdown, df_state] ) # Triggered when the plot button is clicked plot_button.click( generate_plot, inputs=[df_state, column_dropdown], outputs=[plot_output] ) # Launch the app for the Hugging Face Space environment if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)