"""
UI components for displaying statistics and visualizations
"""

import streamlit as st
import pandas as pd
import plotly.express as px
from typing import Dict, Any


def display_chunk_statistics_charts(stats: Dict[str, Any], title: str = "Retrieval Statistics"):
    """Display statistics as interactive charts for 10+ results."""
    if not stats or stats.get('total_chunks', 0) == 0:
        return
    
    # Wrap everything in one styled container - open it
    st.markdown(f"""
    <div class="retrieval-distribution-container">
        <h3 style="margin-top: 0;">📊 {title}</h3>
        <div style="display: flex; justify-content: space-around; align-items: center; padding: 15px 0; border-bottom: 1px solid #e0e0e0; margin-bottom: 20px;">
            <div class="metric-container">
                <div class="metric-label">Total Chunks</div>
                <div class="metric-value">{stats['total_chunks']}</div>
            </div>
            <div class="metric-container">
                <div class="metric-label">Unique Sources</div>
                <div class="metric-value">{stats['unique_sources']}</div>
            </div>
            <div class="metric-container">
                <div class="metric-label">Unique Years</div>
                <div class="metric-value">{stats['unique_years']}</div>
            </div>
            <div class="metric-container">
                <div class="metric-label">Unique Files</div>
                <div class="metric-value">{stats['unique_filenames']}</div>
            </div>
        </div>
    """, unsafe_allow_html=True)
    
    # Charts - three columns to include Districts
    col1, col2, col3 = st.columns(3)
    
    with col1:
        # Source distribution chart
        if stats['source_distribution']:
            source_df = pd.DataFrame(
                list(stats['source_distribution'].items()),
                columns=['Source', 'Count']
            )
            fig_source = px.bar(
                source_df,
                x='Count',
                y='Source',
                orientation='h',
                title='Distribution by Source',
                color='Count',
                color_continuous_scale='viridis'
            )
            fig_source.update_layout(height=400, showlegend=False)
            st.plotly_chart(fig_source, use_container_width=True)  # Note: plotly_chart still uses use_container_width
    
    with col2:
        # Year distribution chart
        if stats['year_distribution']:
            # Filter out 'Unknown' years for the chart
            year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'}
            if year_dist_filtered:
                year_df = pd.DataFrame(
                    list(year_dist_filtered.items()),
                    columns=['Year', 'Count']
                )
                # Sort by year as integer but keep as string for categorical display
                year_df['Year_Int'] = year_df['Year'].astype(int)
                year_df = year_df.sort_values('Year_Int').drop('Year_Int', axis=1)
                
                fig_year = px.bar(
                    year_df,
                    x='Year',
                    y='Count',
                    title='Distribution by Year',
                    color='Count',
                    color_continuous_scale='plasma'
                )
                # Ensure years are treated as categorical (discrete) not continuous
                fig_year.update_xaxes(type='category')
                fig_year.update_layout(height=400, showlegend=False)
                st.plotly_chart(fig_year, use_container_width=True)  # Note: plotly_chart still uses use_container_width
            else:
                st.info("No valid years found in the results")
    
    with col3:
        # District distribution chart
        if stats.get('district_distribution'):
            district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'}
            if district_dist_filtered:
                district_df = pd.DataFrame(
                    list(district_dist_filtered.items()),
                    columns=['District', 'Count']
                )
                district_df = district_df.sort_values('Count', ascending=False)
                
                fig_district = px.bar(
                    district_df,
                    x='Count',
                    y='District',
                    orientation='h',
                    title='Distribution by District',
                    color='Count',
                    color_continuous_scale='blues'
                )
                fig_district.update_layout(height=400, showlegend=False)
                st.plotly_chart(fig_district, use_container_width=True)  # Note: plotly_chart still uses use_container_width
            else:
                st.info("No valid districts found in the results")
    
    # Close the container
    st.markdown('</div>', unsafe_allow_html=True)


def display_chunk_statistics_table(stats: Dict[str, Any], title: str = "Retrieval Distribution"):
    """Display statistics as tables for smaller results with fixed alignment."""
    if not stats or stats.get('total_chunks', 0) == 0:
        return
    
    # Wrap in styled container
    st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
    
    st.subheader(f"📊 {title}")
    
    # Create a container with fixed height for alignment
    stats_container = st.container()
    
    with stats_container:
        # Create 4 equal columns for consistent alignment
        col1, col2, col3, col4 = st.columns(4)
        
        with col1:
            st.markdown("**🏘️ Districts**")
            if stats.get('district_distribution'):
                district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'}
                if district_dist_filtered:
                    district_data = {
                        "District": list(district_dist_filtered.keys()),
                        "Count": list(district_dist_filtered.values())
                    }
                    district_df = pd.DataFrame(district_data).sort_values('Count', ascending=False)
                    st.dataframe(district_df, hide_index=True, width='stretch')
                else:
                    st.write("No district data")
            else:
                st.write("No district data")
        
        with col2:
            st.markdown("**📂 Sources**")
            if stats['source_distribution']:
                source_data = {
                    "Source": list(stats['source_distribution'].keys()),
                    "Count": list(stats['source_distribution'].values())
                }
                source_df = pd.DataFrame(source_data).sort_values('Count', ascending=False)
                st.dataframe(source_df, hide_index=True, width='stretch')
            else:
                st.write("No source data")
        
        with col3:
            st.markdown("**📅 Years**")
            if stats['year_distribution']:
                year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'}
                if year_dist_filtered:
                    year_data = {
                        "Year": list(year_dist_filtered.keys()),
                        "Count": list(year_dist_filtered.values())
                    }
                    year_df = pd.DataFrame(year_data)
                    # Sort by year as integer but display as string
                    year_df['Year_Int'] = year_df['Year'].astype(int)
                    year_df = year_df.sort_values('Year_Int')[['Year', 'Count']]
                    st.dataframe(year_df, hide_index=True, width='stretch')
                else:
                    st.write("No year data")
            else:
                st.write("No year data")
        
        with col4:
            st.markdown("**📄 Files**")
            if stats['filename_distribution']:
                filename_items = list(stats['filename_distribution'].items())
                filename_items.sort(key=lambda x: x[1], reverse=True)
                
                # Show top files with truncated names
                file_data = {
                    "File": [f[:30] + "..." if len(f) > 30 else f for f, c in filename_items[:5]],
                    "Count": [c for f, c in filename_items[:5]]
                }
                file_df = pd.DataFrame(file_data)
                st.dataframe(file_df, hide_index=True, width='stretch')
            else:
                st.write("No file data")
    
    # Close container
    st.markdown('</div>', unsafe_allow_html=True)