import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import gradio as gr import io from PIL import Image def eda_agent(file): try: df = pd.read_csv(file.name) except Exception as e: return f"❌ Error loading file: {e}", [] output = "" # Step 1: Shape output += f"### 🧮 Dataset Shape: {df.shape}\n\n" # Step 2: Column Types output += "### 🔤 Column Data Types:\n" + df.dtypes.to_string() + "\n\n" # Step 3: Missing Values output += "### ❓ Missing Values:\n" + df.isnull().sum().to_string() + "\n\n" # Step 4: Duplicate Rows output += f"### 🔁 Duplicate Rows: {df.duplicated().sum()}\n\n" # Step 5: Summary Statistics output += "### 📊 Summary Statistics:\n" + df.describe(include='all').to_string() + "\n\n" # Step 6: First and Last 5 Rows output += "### 🔍 First 5 Rows:\n" + df.head().to_string() + "\n\n" output += "### 🔍 Last 5 Rows:\n" + df.tail().to_string() + "\n\n" # Step 7: Unique Values per Column output += "### 🧬 Unique Values per Column:\n" for col in df.columns: output += f"- {col}: {df[col].nunique()} unique values\n" output += "\n" # Step 8: Value Counts for Categorical Columns output += "### 🏷️ Top Value Counts (for categorical features):\n" cat_cols = df.select_dtypes(include='object').columns for col in cat_cols: output += f"\n#### {col}:\n{df[col].value_counts(dropna=False).head(5).to_string()}\n" # Plots plots = [] # Step 9: Numeric Histograms numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns for col in numeric_cols[:5]: # Limit to first 5 plt.figure(figsize=(6, 4)) sns.histplot(df[col].dropna(), kde=True) plt.title(f'Distribution of {col}') buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) plots.append(Image.open(buf)) plt.close() # Step 10: Correlation Heatmap if len(numeric_cols) >= 2: plt.figure(figsize=(10, 6)) sns.heatmap(df[numeric_cols].corr(), annot=True, cmap='coolwarm') plt.title("Correlation Heatmap") buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) plots.append(Image.open(buf)) plt.close() return output, plots # Gradio interface demo = gr.Interface( fn=eda_agent, inputs=gr.File(label="📁 Upload CSV File"), outputs=[ gr.Textbox(label="📝 Full EDA Summary", lines=40), gr.Gallery(label="📊 Visualizations") ], title="📈 Auto EDA Agent", description="Upload your CSV to get full exploratory data analysis — summary, stats, and visualizations." ) demo.launch()