""" UI components for displaying statistics and visualizations """ import streamlit as st import pandas as pd import plotly.express as px from typing import Dict, Any def display_chunk_statistics_charts(stats: Dict[str, Any], title: str = "Retrieval Statistics"): """Display statistics as interactive charts for 10+ results.""" if not stats or stats.get('total_chunks', 0) == 0: return # Wrap everything in one styled container - open it st.markdown(f"""

📊 {title}

Total Chunks
{stats['total_chunks']}
Unique Sources
{stats['unique_sources']}
Unique Years
{stats['unique_years']}
Unique Files
{stats['unique_filenames']}
""", unsafe_allow_html=True) # Charts - three columns to include Districts col1, col2, col3 = st.columns(3) with col1: # Source distribution chart if stats['source_distribution']: source_df = pd.DataFrame( list(stats['source_distribution'].items()), columns=['Source', 'Count'] ) fig_source = px.bar( source_df, x='Count', y='Source', orientation='h', title='Distribution by Source', color='Count', color_continuous_scale='viridis' ) fig_source.update_layout(height=400, showlegend=False) st.plotly_chart(fig_source, use_container_width=True) # Note: plotly_chart still uses use_container_width with col2: # Year distribution chart if stats['year_distribution']: # Filter out 'Unknown' years for the chart year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'} if year_dist_filtered: year_df = pd.DataFrame( list(year_dist_filtered.items()), columns=['Year', 'Count'] ) # Sort by year as integer but keep as string for categorical display year_df['Year_Int'] = year_df['Year'].astype(int) year_df = year_df.sort_values('Year_Int').drop('Year_Int', axis=1) fig_year = px.bar( year_df, x='Year', y='Count', title='Distribution by Year', color='Count', color_continuous_scale='plasma' ) # Ensure years are treated as categorical (discrete) not continuous fig_year.update_xaxes(type='category') fig_year.update_layout(height=400, showlegend=False) st.plotly_chart(fig_year, use_container_width=True) # Note: plotly_chart still uses use_container_width else: st.info("No valid years found in the results") with col3: # District distribution chart if stats.get('district_distribution'): district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'} if district_dist_filtered: district_df = pd.DataFrame( list(district_dist_filtered.items()), columns=['District', 'Count'] ) district_df = district_df.sort_values('Count', ascending=False) fig_district = px.bar( district_df, x='Count', y='District', orientation='h', title='Distribution by District', color='Count', color_continuous_scale='blues' ) fig_district.update_layout(height=400, showlegend=False) st.plotly_chart(fig_district, use_container_width=True) # Note: plotly_chart still uses use_container_width else: st.info("No valid districts found in the results") # Close the container st.markdown('
', unsafe_allow_html=True) def display_chunk_statistics_table(stats: Dict[str, Any], title: str = "Retrieval Distribution"): """Display statistics as tables for smaller results with fixed alignment.""" if not stats or stats.get('total_chunks', 0) == 0: return # Wrap in styled container st.markdown('
', unsafe_allow_html=True) st.subheader(f"📊 {title}") # Create a container with fixed height for alignment stats_container = st.container() with stats_container: # Create 4 equal columns for consistent alignment col1, col2, col3, col4 = st.columns(4) with col1: st.markdown("**🏘️ Districts**") if stats.get('district_distribution'): district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'} if district_dist_filtered: district_data = { "District": list(district_dist_filtered.keys()), "Count": list(district_dist_filtered.values()) } district_df = pd.DataFrame(district_data).sort_values('Count', ascending=False) st.dataframe(district_df, hide_index=True, width='stretch') else: st.write("No district data") else: st.write("No district data") with col2: st.markdown("**📂 Sources**") if stats['source_distribution']: source_data = { "Source": list(stats['source_distribution'].keys()), "Count": list(stats['source_distribution'].values()) } source_df = pd.DataFrame(source_data).sort_values('Count', ascending=False) st.dataframe(source_df, hide_index=True, width='stretch') else: st.write("No source data") with col3: st.markdown("**📅 Years**") if stats['year_distribution']: year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'} if year_dist_filtered: year_data = { "Year": list(year_dist_filtered.keys()), "Count": list(year_dist_filtered.values()) } year_df = pd.DataFrame(year_data) # Sort by year as integer but display as string year_df['Year_Int'] = year_df['Year'].astype(int) year_df = year_df.sort_values('Year_Int')[['Year', 'Count']] st.dataframe(year_df, hide_index=True, width='stretch') else: st.write("No year data") else: st.write("No year data") with col4: st.markdown("**📄 Files**") if stats['filename_distribution']: filename_items = list(stats['filename_distribution'].items()) filename_items.sort(key=lambda x: x[1], reverse=True) # Show top files with truncated names file_data = { "File": [f[:30] + "..." if len(f) > 30 else f for f, c in filename_items[:5]], "Count": [c for f, c in filename_items[:5]] } file_df = pd.DataFrame(file_data) st.dataframe(file_df, hide_index=True, width='stretch') else: st.write("No file data") # Close container st.markdown('
', unsafe_allow_html=True)