Madiharehan's picture
Update app.py
c6edd01 verified
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import gradio as gr
import io
from PIL import Image
def eda_agent(file):
try:
df = pd.read_csv(file.name)
except Exception as e:
return f"โŒ Error loading file: {e}", []
output = ""
# Step 1: Shape
output += f"### ๐Ÿงฎ Dataset Shape: {df.shape}\n\n"
# Step 2: Column Types
output += "### ๐Ÿ”ค Column Data Types:\n" + df.dtypes.to_string() + "\n\n"
# Step 3: Missing Values
output += "### โ“ Missing Values:\n" + df.isnull().sum().to_string() + "\n\n"
# Step 4: Duplicate Rows
output += f"### ๐Ÿ” Duplicate Rows: {df.duplicated().sum()}\n\n"
# Step 5: Summary Statistics
output += "### ๐Ÿ“Š Summary Statistics:\n" + df.describe(include='all').to_string() + "\n\n"
# Step 6: First and Last 5 Rows
output += "### ๐Ÿ” First 5 Rows:\n" + df.head().to_string() + "\n\n"
output += "### ๐Ÿ” Last 5 Rows:\n" + df.tail().to_string() + "\n\n"
# Step 7: Unique Values per Column
output += "### ๐Ÿงฌ Unique Values per Column:\n"
for col in df.columns:
output += f"- {col}: {df[col].nunique()} unique values\n"
output += "\n"
# Step 8: Value Counts for Categorical Columns
output += "### ๐Ÿท๏ธ Top Value Counts (for categorical features):\n"
cat_cols = df.select_dtypes(include='object').columns
for col in cat_cols:
output += f"\n#### {col}:\n{df[col].value_counts(dropna=False).head(5).to_string()}\n"
# Plots
plots = []
# Step 9: Numeric Histograms
numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
for col in numeric_cols[:5]: # Limit to first 5
plt.figure(figsize=(6, 4))
sns.histplot(df[col].dropna(), kde=True)
plt.title(f'Distribution of {col}')
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
plots.append(Image.open(buf))
plt.close()
# Step 10: Correlation Heatmap
if len(numeric_cols) >= 2:
plt.figure(figsize=(10, 6))
sns.heatmap(df[numeric_cols].corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
plots.append(Image.open(buf))
plt.close()
return output, plots
# Gradio interface
demo = gr.Interface(
fn=eda_agent,
inputs=gr.File(label="๐Ÿ“ Upload CSV File"),
outputs=[
gr.Textbox(label="๐Ÿ“ Full EDA Summary", lines=40),
gr.Gallery(label="๐Ÿ“Š Visualizations")
],
title="๐Ÿ“ˆ Auto EDA Agent",
description="Upload your CSV to get full exploratory data analysis โ€” summary, stats, and visualizations."
)
demo.launch()