akryldigital's picture
Gemini FSA (#6)
8d898c4 verified
raw
history blame
8.74 kB
"""
UI components for displaying statistics and visualizations
"""
import streamlit as st
import pandas as pd
import plotly.express as px
from typing import Dict, Any
def display_chunk_statistics_charts(stats: Dict[str, Any], title: str = "Retrieval Statistics"):
"""Display statistics as interactive charts for 10+ results."""
if not stats or stats.get('total_chunks', 0) == 0:
return
# Wrap everything in one styled container - open it
st.markdown(f"""
<div class="retrieval-distribution-container">
<h3 style="margin-top: 0;">πŸ“Š {title}</h3>
<div style="display: flex; justify-content: space-around; align-items: center; padding: 15px 0; border-bottom: 1px solid #e0e0e0; margin-bottom: 20px;">
<div class="metric-container">
<div class="metric-label">Total Chunks</div>
<div class="metric-value">{stats['total_chunks']}</div>
</div>
<div class="metric-container">
<div class="metric-label">Unique Sources</div>
<div class="metric-value">{stats['unique_sources']}</div>
</div>
<div class="metric-container">
<div class="metric-label">Unique Years</div>
<div class="metric-value">{stats['unique_years']}</div>
</div>
<div class="metric-container">
<div class="metric-label">Unique Files</div>
<div class="metric-value">{stats['unique_filenames']}</div>
</div>
</div>
""", unsafe_allow_html=True)
# Charts - three columns to include Districts
col1, col2, col3 = st.columns(3)
with col1:
# Source distribution chart
if stats['source_distribution']:
source_df = pd.DataFrame(
list(stats['source_distribution'].items()),
columns=['Source', 'Count']
)
fig_source = px.bar(
source_df,
x='Count',
y='Source',
orientation='h',
title='Distribution by Source',
color='Count',
color_continuous_scale='viridis'
)
fig_source.update_layout(height=400, showlegend=False)
st.plotly_chart(fig_source, use_container_width=True) # Note: plotly_chart still uses use_container_width
with col2:
# Year distribution chart
if stats['year_distribution']:
# Filter out 'Unknown' years for the chart
year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'}
if year_dist_filtered:
year_df = pd.DataFrame(
list(year_dist_filtered.items()),
columns=['Year', 'Count']
)
# Sort by year as integer but keep as string for categorical display
year_df['Year_Int'] = year_df['Year'].astype(int)
year_df = year_df.sort_values('Year_Int').drop('Year_Int', axis=1)
fig_year = px.bar(
year_df,
x='Year',
y='Count',
title='Distribution by Year',
color='Count',
color_continuous_scale='plasma'
)
# Ensure years are treated as categorical (discrete) not continuous
fig_year.update_xaxes(type='category')
fig_year.update_layout(height=400, showlegend=False)
st.plotly_chart(fig_year, use_container_width=True) # Note: plotly_chart still uses use_container_width
else:
st.info("No valid years found in the results")
with col3:
# District distribution chart
if stats.get('district_distribution'):
district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'}
if district_dist_filtered:
district_df = pd.DataFrame(
list(district_dist_filtered.items()),
columns=['District', 'Count']
)
district_df = district_df.sort_values('Count', ascending=False)
fig_district = px.bar(
district_df,
x='Count',
y='District',
orientation='h',
title='Distribution by District',
color='Count',
color_continuous_scale='blues'
)
fig_district.update_layout(height=400, showlegend=False)
st.plotly_chart(fig_district, use_container_width=True) # Note: plotly_chart still uses use_container_width
else:
st.info("No valid districts found in the results")
# Close the container
st.markdown('</div>', unsafe_allow_html=True)
def display_chunk_statistics_table(stats: Dict[str, Any], title: str = "Retrieval Distribution"):
"""Display statistics as tables for smaller results with fixed alignment."""
if not stats or stats.get('total_chunks', 0) == 0:
return
# Wrap in styled container
st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
st.subheader(f"πŸ“Š {title}")
# Create a container with fixed height for alignment
stats_container = st.container()
with stats_container:
# Create 4 equal columns for consistent alignment
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown("**🏘️ Districts**")
if stats.get('district_distribution'):
district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'}
if district_dist_filtered:
district_data = {
"District": list(district_dist_filtered.keys()),
"Count": list(district_dist_filtered.values())
}
district_df = pd.DataFrame(district_data).sort_values('Count', ascending=False)
st.dataframe(district_df, hide_index=True, width='stretch')
else:
st.write("No district data")
else:
st.write("No district data")
with col2:
st.markdown("**πŸ“‚ Sources**")
if stats['source_distribution']:
source_data = {
"Source": list(stats['source_distribution'].keys()),
"Count": list(stats['source_distribution'].values())
}
source_df = pd.DataFrame(source_data).sort_values('Count', ascending=False)
st.dataframe(source_df, hide_index=True, width='stretch')
else:
st.write("No source data")
with col3:
st.markdown("**πŸ“… Years**")
if stats['year_distribution']:
year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'}
if year_dist_filtered:
year_data = {
"Year": list(year_dist_filtered.keys()),
"Count": list(year_dist_filtered.values())
}
year_df = pd.DataFrame(year_data)
# Sort by year as integer but display as string
year_df['Year_Int'] = year_df['Year'].astype(int)
year_df = year_df.sort_values('Year_Int')[['Year', 'Count']]
st.dataframe(year_df, hide_index=True, width='stretch')
else:
st.write("No year data")
else:
st.write("No year data")
with col4:
st.markdown("**πŸ“„ Files**")
if stats['filename_distribution']:
filename_items = list(stats['filename_distribution'].items())
filename_items.sort(key=lambda x: x[1], reverse=True)
# Show top files with truncated names
file_data = {
"File": [f[:30] + "..." if len(f) > 30 else f for f, c in filename_items[:5]],
"Count": [c for f, c in filename_items[:5]]
}
file_df = pd.DataFrame(file_data)
st.dataframe(file_df, hide_index=True, width='stretch')
else:
st.write("No file data")
# Close container
st.markdown('</div>', unsafe_allow_html=True)