akryldigital's picture
Gemini FSA (#6)
8d898c4 verified
"""
UI components for displaying statistics and visualizations
"""
import streamlit as st
import pandas as pd
import plotly.express as px
from typing import Dict, Any
def display_chunk_statistics_charts(stats: Dict[str, Any], title: str = "Retrieval Statistics"):
"""Display statistics as interactive charts for 10+ results."""
if not stats or stats.get('total_chunks', 0) == 0:
return
# Wrap everything in one styled container - open it
st.markdown(f"""
<div class="retrieval-distribution-container">
<h3 style="margin-top: 0;">πŸ“Š {title}</h3>
<div style="display: flex; justify-content: space-around; align-items: center; padding: 15px 0; border-bottom: 1px solid #e0e0e0; margin-bottom: 20px;">
<div class="metric-container">
<div class="metric-label">Total Chunks</div>
<div class="metric-value">{stats['total_chunks']}</div>
</div>
<div class="metric-container">
<div class="metric-label">Unique Sources</div>
<div class="metric-value">{stats['unique_sources']}</div>
</div>
<div class="metric-container">
<div class="metric-label">Unique Years</div>
<div class="metric-value">{stats['unique_years']}</div>
</div>
<div class="metric-container">
<div class="metric-label">Unique Files</div>
<div class="metric-value">{stats['unique_filenames']}</div>
</div>
</div>
""", unsafe_allow_html=True)
# Charts - three columns to include Districts
col1, col2, col3 = st.columns(3)
with col1:
# Source distribution chart
if stats['source_distribution']:
source_df = pd.DataFrame(
list(stats['source_distribution'].items()),
columns=['Source', 'Count']
)
fig_source = px.bar(
source_df,
x='Count',
y='Source',
orientation='h',
title='Distribution by Source',
color='Count',
color_continuous_scale='viridis'
)
fig_source.update_layout(height=400, showlegend=False)
st.plotly_chart(fig_source, use_container_width=True) # Note: plotly_chart still uses use_container_width
with col2:
# Year distribution chart
if stats['year_distribution']:
# Filter out 'Unknown' years for the chart
year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'}
if year_dist_filtered:
year_df = pd.DataFrame(
list(year_dist_filtered.items()),
columns=['Year', 'Count']
)
# Sort by year as integer but keep as string for categorical display
year_df['Year_Int'] = year_df['Year'].astype(int)
year_df = year_df.sort_values('Year_Int').drop('Year_Int', axis=1)
fig_year = px.bar(
year_df,
x='Year',
y='Count',
title='Distribution by Year',
color='Count',
color_continuous_scale='plasma'
)
# Ensure years are treated as categorical (discrete) not continuous
fig_year.update_xaxes(type='category')
fig_year.update_layout(height=400, showlegend=False)
st.plotly_chart(fig_year, use_container_width=True) # Note: plotly_chart still uses use_container_width
else:
st.info("No valid years found in the results")
with col3:
# District distribution chart
if stats.get('district_distribution'):
district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'}
if district_dist_filtered:
district_df = pd.DataFrame(
list(district_dist_filtered.items()),
columns=['District', 'Count']
)
district_df = district_df.sort_values('Count', ascending=False)
fig_district = px.bar(
district_df,
x='Count',
y='District',
orientation='h',
title='Distribution by District',
color='Count',
color_continuous_scale='blues'
)
fig_district.update_layout(height=400, showlegend=False)
st.plotly_chart(fig_district, use_container_width=True) # Note: plotly_chart still uses use_container_width
else:
st.info("No valid districts found in the results")
# Close the container
st.markdown('</div>', unsafe_allow_html=True)
def display_chunk_statistics_table(stats: Dict[str, Any], title: str = "Retrieval Distribution"):
"""Display statistics as tables for smaller results with fixed alignment."""
if not stats or stats.get('total_chunks', 0) == 0:
return
# Wrap in styled container
st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
st.subheader(f"πŸ“Š {title}")
# Create a container with fixed height for alignment
stats_container = st.container()
with stats_container:
# Create 4 equal columns for consistent alignment
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown("**🏘️ Districts**")
if stats.get('district_distribution'):
district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'}
if district_dist_filtered:
district_data = {
"District": list(district_dist_filtered.keys()),
"Count": list(district_dist_filtered.values())
}
district_df = pd.DataFrame(district_data).sort_values('Count', ascending=False)
st.dataframe(district_df, hide_index=True, width='stretch')
else:
st.write("No district data")
else:
st.write("No district data")
with col2:
st.markdown("**πŸ“‚ Sources**")
if stats['source_distribution']:
source_data = {
"Source": list(stats['source_distribution'].keys()),
"Count": list(stats['source_distribution'].values())
}
source_df = pd.DataFrame(source_data).sort_values('Count', ascending=False)
st.dataframe(source_df, hide_index=True, width='stretch')
else:
st.write("No source data")
with col3:
st.markdown("**πŸ“… Years**")
if stats['year_distribution']:
year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'}
if year_dist_filtered:
year_data = {
"Year": list(year_dist_filtered.keys()),
"Count": list(year_dist_filtered.values())
}
year_df = pd.DataFrame(year_data)
# Sort by year as integer but display as string
year_df['Year_Int'] = year_df['Year'].astype(int)
year_df = year_df.sort_values('Year_Int')[['Year', 'Count']]
st.dataframe(year_df, hide_index=True, width='stretch')
else:
st.write("No year data")
else:
st.write("No year data")
with col4:
st.markdown("**πŸ“„ Files**")
if stats['filename_distribution']:
filename_items = list(stats['filename_distribution'].items())
filename_items.sort(key=lambda x: x[1], reverse=True)
# Show top files with truncated names
file_data = {
"File": [f[:30] + "..." if len(f) > 30 else f for f, c in filename_items[:5]],
"Count": [c for f, c in filename_items[:5]]
}
file_df = pd.DataFrame(file_data)
st.dataframe(file_df, hide_index=True, width='stretch')
else:
st.write("No file data")
# Close container
st.markdown('</div>', unsafe_allow_html=True)