Spaces:
Sleeping
Sleeping
| """ | |
| UI components for displaying statistics and visualizations | |
| """ | |
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| from typing import Dict, Any | |
| def display_chunk_statistics_charts(stats: Dict[str, Any], title: str = "Retrieval Statistics"): | |
| """Display statistics as interactive charts for 10+ results.""" | |
| if not stats or stats.get('total_chunks', 0) == 0: | |
| return | |
| # Wrap everything in one styled container - open it | |
| st.markdown(f""" | |
| <div class="retrieval-distribution-container"> | |
| <h3 style="margin-top: 0;">π {title}</h3> | |
| <div style="display: flex; justify-content: space-around; align-items: center; padding: 15px 0; border-bottom: 1px solid #e0e0e0; margin-bottom: 20px;"> | |
| <div class="metric-container"> | |
| <div class="metric-label">Total Chunks</div> | |
| <div class="metric-value">{stats['total_chunks']}</div> | |
| </div> | |
| <div class="metric-container"> | |
| <div class="metric-label">Unique Sources</div> | |
| <div class="metric-value">{stats['unique_sources']}</div> | |
| </div> | |
| <div class="metric-container"> | |
| <div class="metric-label">Unique Years</div> | |
| <div class="metric-value">{stats['unique_years']}</div> | |
| </div> | |
| <div class="metric-container"> | |
| <div class="metric-label">Unique Files</div> | |
| <div class="metric-value">{stats['unique_filenames']}</div> | |
| </div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Charts - three columns to include Districts | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| # Source distribution chart | |
| if stats['source_distribution']: | |
| source_df = pd.DataFrame( | |
| list(stats['source_distribution'].items()), | |
| columns=['Source', 'Count'] | |
| ) | |
| fig_source = px.bar( | |
| source_df, | |
| x='Count', | |
| y='Source', | |
| orientation='h', | |
| title='Distribution by Source', | |
| color='Count', | |
| color_continuous_scale='viridis' | |
| ) | |
| fig_source.update_layout(height=400, showlegend=False) | |
| st.plotly_chart(fig_source, use_container_width=True) # Note: plotly_chart still uses use_container_width | |
| with col2: | |
| # Year distribution chart | |
| if stats['year_distribution']: | |
| # Filter out 'Unknown' years for the chart | |
| year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'} | |
| if year_dist_filtered: | |
| year_df = pd.DataFrame( | |
| list(year_dist_filtered.items()), | |
| columns=['Year', 'Count'] | |
| ) | |
| # Sort by year as integer but keep as string for categorical display | |
| year_df['Year_Int'] = year_df['Year'].astype(int) | |
| year_df = year_df.sort_values('Year_Int').drop('Year_Int', axis=1) | |
| fig_year = px.bar( | |
| year_df, | |
| x='Year', | |
| y='Count', | |
| title='Distribution by Year', | |
| color='Count', | |
| color_continuous_scale='plasma' | |
| ) | |
| # Ensure years are treated as categorical (discrete) not continuous | |
| fig_year.update_xaxes(type='category') | |
| fig_year.update_layout(height=400, showlegend=False) | |
| st.plotly_chart(fig_year, use_container_width=True) # Note: plotly_chart still uses use_container_width | |
| else: | |
| st.info("No valid years found in the results") | |
| with col3: | |
| # District distribution chart | |
| if stats.get('district_distribution'): | |
| district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'} | |
| if district_dist_filtered: | |
| district_df = pd.DataFrame( | |
| list(district_dist_filtered.items()), | |
| columns=['District', 'Count'] | |
| ) | |
| district_df = district_df.sort_values('Count', ascending=False) | |
| fig_district = px.bar( | |
| district_df, | |
| x='Count', | |
| y='District', | |
| orientation='h', | |
| title='Distribution by District', | |
| color='Count', | |
| color_continuous_scale='blues' | |
| ) | |
| fig_district.update_layout(height=400, showlegend=False) | |
| st.plotly_chart(fig_district, use_container_width=True) # Note: plotly_chart still uses use_container_width | |
| else: | |
| st.info("No valid districts found in the results") | |
| # Close the container | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| def display_chunk_statistics_table(stats: Dict[str, Any], title: str = "Retrieval Distribution"): | |
| """Display statistics as tables for smaller results with fixed alignment.""" | |
| if not stats or stats.get('total_chunks', 0) == 0: | |
| return | |
| # Wrap in styled container | |
| st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True) | |
| st.subheader(f"π {title}") | |
| # Create a container with fixed height for alignment | |
| stats_container = st.container() | |
| with stats_container: | |
| # Create 4 equal columns for consistent alignment | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.markdown("**ποΈ Districts**") | |
| if stats.get('district_distribution'): | |
| district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'} | |
| if district_dist_filtered: | |
| district_data = { | |
| "District": list(district_dist_filtered.keys()), | |
| "Count": list(district_dist_filtered.values()) | |
| } | |
| district_df = pd.DataFrame(district_data).sort_values('Count', ascending=False) | |
| st.dataframe(district_df, hide_index=True, width='stretch') | |
| else: | |
| st.write("No district data") | |
| else: | |
| st.write("No district data") | |
| with col2: | |
| st.markdown("**π Sources**") | |
| if stats['source_distribution']: | |
| source_data = { | |
| "Source": list(stats['source_distribution'].keys()), | |
| "Count": list(stats['source_distribution'].values()) | |
| } | |
| source_df = pd.DataFrame(source_data).sort_values('Count', ascending=False) | |
| st.dataframe(source_df, hide_index=True, width='stretch') | |
| else: | |
| st.write("No source data") | |
| with col3: | |
| st.markdown("**π Years**") | |
| if stats['year_distribution']: | |
| year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'} | |
| if year_dist_filtered: | |
| year_data = { | |
| "Year": list(year_dist_filtered.keys()), | |
| "Count": list(year_dist_filtered.values()) | |
| } | |
| year_df = pd.DataFrame(year_data) | |
| # Sort by year as integer but display as string | |
| year_df['Year_Int'] = year_df['Year'].astype(int) | |
| year_df = year_df.sort_values('Year_Int')[['Year', 'Count']] | |
| st.dataframe(year_df, hide_index=True, width='stretch') | |
| else: | |
| st.write("No year data") | |
| else: | |
| st.write("No year data") | |
| with col4: | |
| st.markdown("**π Files**") | |
| if stats['filename_distribution']: | |
| filename_items = list(stats['filename_distribution'].items()) | |
| filename_items.sort(key=lambda x: x[1], reverse=True) | |
| # Show top files with truncated names | |
| file_data = { | |
| "File": [f[:30] + "..." if len(f) > 30 else f for f, c in filename_items[:5]], | |
| "Count": [c for f, c in filename_items[:5]] | |
| } | |
| file_df = pd.DataFrame(file_data) | |
| st.dataframe(file_df, hide_index=True, width='stretch') | |
| else: | |
| st.write("No file data") | |
| # Close container | |
| st.markdown('</div>', unsafe_allow_html=True) | |