"""
Enhanced filtering and visualization example for UAP Data Analysis Tool
Demonstrates the improved dynamic filtering and visualization pipeline
"""

import streamlit as st
import pandas as pd
import numpy as np
from data_processing import DataProcessor
from visualization import UAP_Visualizer
from session_manager import SessionStateManager
import plotly.graph_objects as go

def main():
    """Main function demonstrating enhanced filtering and visualization"""
    
    st.title("🚀 Enhanced UAP Data Analysis Pipeline")
    st.markdown("### Dynamic Filtering & Interactive Visualization System")
    
    # Initialize session state
    SessionStateManager.initialize()
    
    # Load data with caching
    @st.cache_data
    def load_sample_data():
        """Load sample data for demonstration"""
        try:
            # Try to load the actual UAP dataset
            df = DataProcessor.load_data('final_ufoseti_dataset.h5', key='df')
            st.success(f"✅ Loaded UAP dataset with {len(df):,} records")
            return df
        except Exception as e:
            st.warning(f"Could not load UAP dataset: {e}")
            # Create sample data if real data not available
            np.random.seed(42)
            n_samples = 10000
            
            sample_data = {
                'date': pd.date_range('2020-01-01', periods=n_samples, freq='H'),
                'latitude': np.random.uniform(-90, 90, n_samples),
                'longitude': np.random.uniform(-180, 180, n_samples),
                'duration_minutes': np.random.exponential(10, n_samples),
                'shape': np.random.choice(['circle', 'triangle', 'disk', 'light', 'other'], n_samples),
                'color': np.random.choice(['white', 'red', 'orange', 'blue', 'green', 'unknown'], n_samples),
                'altitude': np.random.uniform(100, 50000, n_samples),
                'witnesses': np.random.poisson(2, n_samples) + 1,
                'credibility_score': np.random.beta(2, 5, n_samples),
                'description_length': np.random.lognormal(3, 1, n_samples).astype(int)
            }
            
            df = pd.DataFrame(sample_data)
            st.info(f"📊 Using sample dataset with {len(df):,} records for demonstration")
            return df
    
    # Load the data
    df = load_sample_data()
    
    # Sidebar for analysis options
    with st.sidebar:
        st.header("🔧 Analysis Options")
        
        analysis_mode = st.radio(
            "Select Analysis Mode",
            ["Enhanced Filtering", "Interactive Visualizations", "Dashboard View", "Performance Demo"]
        )
        
        enable_quick_filters = st.checkbox("Enable Quick Filters", value=False)
        enable_advanced_filters = st.checkbox("Enable Advanced Filters", value=True)
        max_viz_points = st.slider("Max Visualization Points", 1000, 50000, 10000, step=1000)
    
    # Main content based on selected mode
    if analysis_mode == "Enhanced Filtering":
        show_enhanced_filtering(df, enable_quick_filters, enable_advanced_filters)
        
    elif analysis_mode == "Interactive Visualizations":
        show_interactive_visualizations(df, max_viz_points)
        
    elif analysis_mode == "Dashboard View":
        show_dashboard_view(df, max_viz_points)
        
    elif analysis_mode == "Performance Demo":
        show_performance_demo(df)

def show_enhanced_filtering(df: pd.DataFrame, enable_quick_filters: bool, enable_advanced_filters: bool):
    """Demonstrate enhanced filtering capabilities"""
    
    st.header("🔍 Enhanced Dynamic Filtering")
    
    # Show data profile first
    with st.expander("📊 Data Profile Analysis", expanded=True):
        profile = DataProcessor.profile_data(df)
        
        col1, col2, col3, col4 = st.columns(4)
        with col1:
            st.metric("Categorical Columns", len(profile['categorical_columns']))
        with col2:
            st.metric("Numeric Columns", len(profile['numeric_columns']))
        with col3:
            st.metric("DateTime Columns", len(profile['datetime_columns']))
        with col4:
            st.metric("Text Columns", len(profile['text_columns']))
        
        # Show column details
        if st.checkbox("Show detailed column analysis"):
            st.json(profile)
    
    # Apply enhanced filtering
    st.subheader("Apply Filters")
    filtered_df = DataProcessor.filter_dataframe_enhanced(
        df, 
        enable_quick_filters=enable_quick_filters,
        enable_advanced_filters=enable_advanced_filters
    )
    
    # Show filtered results
    if len(filtered_df) > 0:
        st.subheader("📋 Filtered Data Preview")
        st.dataframe(filtered_df.head(100), use_container_width=True)
        
        # Export options
        col1, col2 = st.columns(2)
        with col1:
            if st.button("💾 Save Filtered Data"):
                # In a real app, you'd save to a file
                SessionStateManager.set('last_filtered_data', filtered_df)
                st.success("Filtered data saved to session!")
        
        with col2:
            csv = filtered_df.to_csv(index=False)
            st.download_button(
                label="📥 Download CSV",
                data=csv,
                file_name="filtered_uap_data.csv",
                mime="text/csv"
            )

def show_interactive_visualizations(df: pd.DataFrame, max_points: int):
    """Demonstrate interactive visualization capabilities"""
    
    st.header("📊 Interactive Visualizations")
    
    # Get numeric and categorical columns
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
    datetime_cols = []
    
    for col in df.columns:
        if pd.api.types.is_datetime64_any_dtype(df[col]):
            datetime_cols.append(col)
    
    # Visualization type selection
    viz_type = st.selectbox(
        "Select Visualization Type",
        ["Scatter Plot", "Histogram", "Treemap", "Correlation Matrix", "Time Series"]
    )
    
    if viz_type == "Scatter Plot" and len(numeric_cols) >= 2:
        col1, col2, col3 = st.columns(3)
        
        with col1:
            x_col = st.selectbox("X-axis", numeric_cols, key="scatter_x")
        with col2:
            y_col = st.selectbox("Y-axis", [col for col in numeric_cols if col != x_col], key="scatter_y")
        with col3:
            color_col = st.selectbox("Color by", ["None"] + categorical_cols, key="scatter_color")
            color_col = None if color_col == "None" else color_col
        
        if st.button("Generate Scatter Plot"):
            fig = UAP_Visualizer.plot_interactive_scatter(
                df, x_col, y_col, color_col=color_col, max_points=max_points
            )
            st.plotly_chart(fig, use_container_width=True)
    
    elif viz_type == "Histogram" and len(numeric_cols) > 0:
        col1, col2 = st.columns(2)
        
        with col1:
            hist_col = st.selectbox("Column to analyze", numeric_cols + categorical_cols, key="hist_col")
        with col2:
            bins = st.slider("Number of bins", 10, 100, 50, key="hist_bins")
        
        if st.button("Generate Histogram"):
            if hist_col in numeric_cols:
                fig = UAP_Visualizer.plot_interactive_histogram(df, hist_col, bins=bins)
                st.plotly_chart(fig, use_container_width=True)
            else:
                # For categorical columns, use treemap instead
                fig = UAP_Visualizer.plot_interactive_treemap(df, hist_col, top_n=20)
                st.plotly_chart(fig, use_container_width=True)
    
    elif viz_type == "Treemap" and len(categorical_cols) > 0:
        col1, col2 = st.columns(2)
        
        with col1:
            tree_col = st.selectbox("Categorical column", categorical_cols, key="tree_col")
        with col2:
            top_n = st.slider("Top N categories", 5, 50, 20, key="tree_n")
        
        if st.button("Generate Treemap"):
            fig = UAP_Visualizer.plot_interactive_treemap(df, tree_col, top_n=top_n)
            st.plotly_chart(fig, use_container_width=True)
    
    elif viz_type == "Correlation Matrix" and len(numeric_cols) >= 2:
        col1, col2 = st.columns(2)
        
        with col1:
            corr_method = st.selectbox("Correlation method", ["pearson", "spearman", "kendall"], key="corr_method")
        with col2:
            selected_cols = st.multiselect("Select columns", numeric_cols, default=numeric_cols[:10], key="corr_cols")
        
        if selected_cols and st.button("Generate Correlation Matrix"):
            fig = UAP_Visualizer.plot_correlation_matrix(df[selected_cols], method=corr_method)
            st.plotly_chart(fig, use_container_width=True)
    
    elif viz_type == "Time Series" and len(datetime_cols) > 0 and len(numeric_cols) > 0:
        col1, col2, col3 = st.columns(3)
        
        with col1:
            date_col = st.selectbox("Date column", datetime_cols, key="ts_date")
        with col2:
            value_cols = st.multiselect("Value columns", numeric_cols, default=numeric_cols[:3], key="ts_values")
        with col3:
            resample_freq = st.selectbox("Resample frequency", ["None", "D", "W", "M"], key="ts_freq")
            resample_freq = None if resample_freq == "None" else resample_freq
        
        if value_cols and st.button("Generate Time Series"):
            fig = UAP_Visualizer.plot_time_series(df, date_col, value_cols, resample_freq=resample_freq)
            st.plotly_chart(fig, use_container_width=True)

def show_dashboard_view(df: pd.DataFrame, max_points: int):
    """Demonstrate dashboard capabilities"""
    
    st.header("📈 Interactive Dashboard")
    
    # Create multiple charts for dashboard
    charts = []
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
    
    if len(numeric_cols) >= 2 and len(categorical_cols) >= 1:
        with st.spinner("Generating dashboard charts..."):
            # Chart 1: Scatter plot
            if len(numeric_cols) >= 2:
                fig1 = UAP_Visualizer.plot_interactive_scatter(
                    df, numeric_cols[0], numeric_cols[1], 
                    color_col=categorical_cols[0] if categorical_cols else None,
                    max_points=max_points//4
                )
                charts.append(fig1)
            
            # Chart 2: Histogram
            if len(numeric_cols) >= 1:
                fig2 = UAP_Visualizer.plot_interactive_histogram(df, numeric_cols[0])
                charts.append(fig2)
            
            # Chart 3: Treemap
            if len(categorical_cols) >= 1:
                fig3 = UAP_Visualizer.plot_interactive_treemap(df, categorical_cols[0], top_n=15)
                charts.append(fig3)
            
            # Chart 4: Correlation matrix (if enough numeric columns)
            if len(numeric_cols) >= 3:
                fig4 = UAP_Visualizer.plot_correlation_matrix(df[numeric_cols[:5]])
                charts.append(fig4)
        
        # Display individual charts
        if len(charts) >= 2:
            col1, col2 = st.columns(2)
            
            with col1:
                st.plotly_chart(charts[0], use_container_width=True)
                if len(charts) >= 3:
                    st.plotly_chart(charts[2], use_container_width=True)
            
            with col2:
                st.plotly_chart(charts[1], use_container_width=True)
                if len(charts) >= 4:
                    st.plotly_chart(charts[3], use_container_width=True)
        
        # Combined dashboard view
        if st.button("Generate Combined Dashboard"):
            dashboard_fig = UAP_Visualizer.create_dashboard_layout(charts[:4], layout="2x2")
            st.plotly_chart(dashboard_fig, use_container_width=True)
    
    else:
        st.warning("Not enough numeric or categorical columns for dashboard generation")

def show_performance_demo(df: pd.DataFrame):
    """Demonstrate performance improvements"""
    
    st.header("⚡ Performance Demonstration")
    
    # Performance metrics
    col1, col2, col3 = st.columns(3)
    
    with col1:
        st.metric("Dataset Size", f"{len(df):,} rows")
    with col2:
        st.metric("Memory Usage", f"{df.memory_usage(deep=True).sum() / 1024**2:.1f} MB")
    with col3:
        cache_info = st.session_state.get('cached_visualizations', {})
        st.metric("Cached Visualizations", len(cache_info))
    
    # Performance comparison
    st.subheader("🏃‍♂️ Speed Comparison")
    
    if st.button("Run Performance Test"):
        import time
        
        # Test data profiling speed
        start_time = time.time()
        profile = DataProcessor.profile_data(df)
        profile_time = time.time() - start_time
        
        # Test visualization generation
        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
        if len(numeric_cols) >= 1:
            start_time = time.time()
            fig = UAP_Visualizer.plot_interactive_histogram(df, numeric_cols[0])
            viz_time = time.time() - start_time
        else:
            viz_time = 0
        
        # Display results
        perf_col1, perf_col2 = st.columns(2)
        
        with perf_col1:
            st.metric("Data Profiling", f"{profile_time:.3f} seconds")
        with perf_col2:
            st.metric("Visualization Generation", f"{viz_time:.3f} seconds")
        
        # Show caching benefits
        st.info("🚀 Subsequent calls to the same functions will be much faster due to caching!")
        
        # Memory optimization demo
        if len(df) > 10000:
            st.subheader("📊 Smart Sampling Demo")
            
            sample_sizes = [1000, 5000, 10000, len(df)]
            sample_times = []
            
            for size in sample_sizes:
                if size <= len(df):
                    start_time = time.time()
                    sampled_df = UAP_Visualizer._smart_sampling(df, max_points=size)
                    sample_time = time.time() - start_time
                    sample_times.append(sample_time)
                else:
                    sample_times.append(None)
            
            # Create performance chart
            perf_data = {
                'Sample Size': [f"{size:,}" for size in sample_sizes if sample_times[sample_sizes.index(size)] is not None],
                'Processing Time': [t for t in sample_times if t is not None]
            }
            
            perf_df = pd.DataFrame(perf_data)
            st.line_chart(perf_df.set_index('Sample Size'))

if __name__ == "__main__":
    main()