Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| import gradio as gr | |
| import io | |
| from PIL import Image | |
| def eda_agent(file): | |
| try: | |
| df = pd.read_csv(file.name) | |
| except Exception as e: | |
| return f"โ Error loading file: {e}", [] | |
| output = "" | |
| # Step 1: Shape | |
| output += f"### ๐งฎ Dataset Shape: {df.shape}\n\n" | |
| # Step 2: Column Types | |
| output += "### ๐ค Column Data Types:\n" + df.dtypes.to_string() + "\n\n" | |
| # Step 3: Missing Values | |
| output += "### โ Missing Values:\n" + df.isnull().sum().to_string() + "\n\n" | |
| # Step 4: Duplicate Rows | |
| output += f"### ๐ Duplicate Rows: {df.duplicated().sum()}\n\n" | |
| # Step 5: Summary Statistics | |
| output += "### ๐ Summary Statistics:\n" + df.describe(include='all').to_string() + "\n\n" | |
| # Step 6: First and Last 5 Rows | |
| output += "### ๐ First 5 Rows:\n" + df.head().to_string() + "\n\n" | |
| output += "### ๐ Last 5 Rows:\n" + df.tail().to_string() + "\n\n" | |
| # Step 7: Unique Values per Column | |
| output += "### ๐งฌ Unique Values per Column:\n" | |
| for col in df.columns: | |
| output += f"- {col}: {df[col].nunique()} unique values\n" | |
| output += "\n" | |
| # Step 8: Value Counts for Categorical Columns | |
| output += "### ๐ท๏ธ Top Value Counts (for categorical features):\n" | |
| cat_cols = df.select_dtypes(include='object').columns | |
| for col in cat_cols: | |
| output += f"\n#### {col}:\n{df[col].value_counts(dropna=False).head(5).to_string()}\n" | |
| # Plots | |
| plots = [] | |
| # Step 9: Numeric Histograms | |
| numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns | |
| for col in numeric_cols[:5]: # Limit to first 5 | |
| plt.figure(figsize=(6, 4)) | |
| sns.histplot(df[col].dropna(), kde=True) | |
| plt.title(f'Distribution of {col}') | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format='png') | |
| buf.seek(0) | |
| plots.append(Image.open(buf)) | |
| plt.close() | |
| # Step 10: Correlation Heatmap | |
| if len(numeric_cols) >= 2: | |
| plt.figure(figsize=(10, 6)) | |
| sns.heatmap(df[numeric_cols].corr(), annot=True, cmap='coolwarm') | |
| plt.title("Correlation Heatmap") | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format='png') | |
| buf.seek(0) | |
| plots.append(Image.open(buf)) | |
| plt.close() | |
| return output, plots | |
| # Gradio interface | |
| demo = gr.Interface( | |
| fn=eda_agent, | |
| inputs=gr.File(label="๐ Upload CSV File"), | |
| outputs=[ | |
| gr.Textbox(label="๐ Full EDA Summary", lines=40), | |
| gr.Gallery(label="๐ Visualizations") | |
| ], | |
| title="๐ Auto EDA Agent", | |
| description="Upload your CSV to get full exploratory data analysis โ summary, stats, and visualizations." | |
| ) | |
| demo.launch() | |