import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import scipy.stats as stats
import base64

def advanced_analysis(file):
    try:
        # Support multiple file types
        supported_extensions = ['.csv', '.xlsx', '.xls', '.txt']
        if not any(file.lower().endswith(ext) for ext in supported_extensions):
            raise ValueError(f"Unsupported file type. Please upload a file with one of these extensions: {', '.join(supported_extensions)}")
        
        # Load file based on extension
        if file.endswith('.csv'):
            df = pd.read_csv(file)
        elif file.endswith(('.xlsx', '.xls')):
            df = pd.read_excel(file)
        elif file.endswith('.txt'):
            df = pd.read_csv(file, sep='\t')
        
        # Comprehensive Analysis Report
        report = "# 📊 Comprehensive Data Analysis Report\n\n"
        
        # 1. Basic Dataset Information
        report += "## 1. Dataset Overview\n"
        report += f"- **Total Rows:** {len(df)}\n"
        report += f"- **Total Columns:** {len(df.columns)}\n"
        report += f"- **Column Types:**\n"
        for col, dtype in df.dtypes.items():
            report += f"  - `{col}`: {dtype}\n"
        
        # 2. Missing Value Analysis
        report += "\n## 2. Missing Value Analysis\n"
        missing_data = df.isnull().sum()
        missing_percentage = 100 * df.isnull().sum() / len(df)
        missing_table = pd.concat([missing_data, missing_percentage], axis=1, 
                                   keys=['Missing Count', 'Missing Percentage'])
        report += "```\n" + missing_table.to_string() + "\n```\n"
        
        # 3. Statistical Summary
        report += "\n## 3. Statistical Summary\n"
        numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
        if len(numeric_cols) > 0:
            stats_summary = df[numeric_cols].describe()
            report += "### Numerical Columns Statistics\n"
            report += "```\n" + stats_summary.to_string() + "\n```\n"
        
        # 4. Outlier Detection
        report += "\n## 4. Outlier Analysis\n"
        outliers = {}
        for col in numeric_cols:
            Q1 = df[col].quantile(0.25)
            Q3 = df[col].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            column_outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)]
            if len(column_outliers) > 0:
                outliers[col] = len(column_outliers)
                report += f"- **{col}:** {len(column_outliers)} outliers detected\n"
        
        # 5. Correlation Analysis
        report += "\n## 5. Correlation Analysis\n"
        if len(numeric_cols) > 1:
            correlation_matrix = df[numeric_cols].corr()
            report += "### Top Correlations\n"
            # Find and report top correlations
            corr_unstack = correlation_matrix.unstack()
            top_correlations = corr_unstack[corr_unstack != 1].nlargest(5)
            for (col1, col2), corr_value in top_correlations.items():
                report += f"- **{col1}** & **{col2}**: {corr_value:.2f}\n"
        
        # Visualizations
        plt.close('all')
        fig, axs = plt.subplots(2, 2, figsize=(20, 15))
        plt.subplots_adjust(hspace=0.4, wspace=0.3)
        
        # Correlation Heatmap
        if len(numeric_cols) > 1:
            sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', ax=axs[0, 0], 
                        square=True, cbar=True, linewidths=0.5)
            axs[0, 0].set_title('Correlation Heatmap', fontsize=14, fontweight='bold')
        
        # Box Plot for Numeric Columns
        df[numeric_cols].boxplot(ax=axs[0, 1])
        axs[0, 1].set_title('Box Plot of Numeric Columns', fontsize=14, fontweight='bold')
        axs[0, 1].tick_params(axis='x', rotation=45)
        
        # Distribution of Categorical Columns
        categorical_cols = df.select_dtypes(include=['object']).columns
        if len(categorical_cols) > 0:
            cat_value_counts = df[categorical_cols[0]].value_counts()
            cat_value_counts.plot(kind='bar', ax=axs[1, 0])
            axs[1, 0].set_title(f'Distribution of {categorical_cols[0]}', 
                                 fontsize=14, fontweight='bold')
            axs[1, 0].tick_params(axis='x', rotation=45)
        
        # Scatter Plot Matrix for Top Correlated Features
        if len(numeric_cols) > 2:
            top_corr_features = correlation_matrix.unstack().sort_values(
                kind="quicksort", ascending=False).head(5)
            top_features = list(set([x[0] for x in top_corr_features.index] + 
                                    [x[1] for x in top_corr_features.index]))[:3]
            pd.plotting.scatter_matrix(df[top_features], 
                                       figsize=(10,10), 
                                       diagonal='hist', 
                                       ax=axs[1, 1])
            axs[1, 1].set_title('Scatter Plot of Top Correlated Features', 
                                fontsize=14, fontweight='bold')
        
        plt.suptitle('🔍 Data Analysis Visualizations', fontsize=16, fontweight='bold')
        plt.tight_layout()
        plt.savefig('data_analysis_advanced.png', dpi=300, bbox_inches='tight')
        plt.close()
        
        return report, 'data_analysis_advanced.png'
    
    except Exception as e:
        error_report = f"## ❌ Analysis Failed\n\n**Error:** {str(e)}\n\n"
        error_report += "Possible reasons:\n"
        error_report += "- Incorrect file format\n"
        error_report += "- Unsupported data types\n"
        error_report += "- Corrupted or incomplete dataset"
        return error_report, None

# Custom CSS for a more modern look
css = """
.gradio-container {
    background-color: #f0f2f6;
    font-family: 'Inter', 'Helvetica Neue', Arial, sans-serif;
}
.output-markdown {
    background-color: white;
    border-radius: 10px;
    padding: 20px;
    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
    max-height: 500px;
    overflow-y: auto;
}
.file-upload {
    background-color: #4CAF50;
    color: white;
    border-radius: 5px;
    padding: 10px 20px;
    transition: background-color 0.3s ease;
}
.file-upload:hover {
    background-color: #45a049;
}
"""

# Gradio Interface with Enhanced UI
demo = gr.Interface(
    fn=advanced_analysis,
    inputs=gr.File(
        type="filepath", 
        label="📤 Upload File", 
        file_count="single", 
        file_types=["csv", "xlsx", "xls", "txt"]  # Updated file types
    ),
    outputs=[
        gr.Markdown(),
        gr.Image(label="📊 Advanced Visualizations")
    ],
    title="🧠 Smart Data Analyzer",
    description="Upload a CSV, Excel, or Text file for comprehensive data analysis, statistical insights, and interactive visualizations.",
    theme='default',
    css=css
)

# Launch the interface
demo.launch(server_name="0.0.0.0", server_port=7860)