Spaces:

akryldigital
/

audit_assistant

Sleeping

App Files Files Community

audit_assistant / src /ui_components /components.py

akryldigital

Gemini FSA (#6)

8d898c4 verified 27 days ago

raw

history blame contribute delete

8.74 kB

	"""
	UI components for displaying statistics and visualizations
	"""

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	from typing import Dict, Any


	def display_chunk_statistics_charts(stats: Dict[str, Any], title: str = "Retrieval Statistics"):
	"""Display statistics as interactive charts for 10+ results."""
	if not stats or stats.get('total_chunks', 0) == 0:
	return

	# Wrap everything in one styled container - open it
	st.markdown(f"""
	<div class="retrieval-distribution-container">
	<h3 style="margin-top: 0;">📊 {title}</h3>
	<div style="display: flex; justify-content: space-around; align-items: center; padding: 15px 0; border-bottom: 1px solid #e0e0e0; margin-bottom: 20px;">
	<div class="metric-container">
	<div class="metric-label">Total Chunks</div>
	<div class="metric-value">{stats['total_chunks']}</div>
	</div>
	<div class="metric-container">
	<div class="metric-label">Unique Sources</div>
	<div class="metric-value">{stats['unique_sources']}</div>
	</div>
	<div class="metric-container">
	<div class="metric-label">Unique Years</div>
	<div class="metric-value">{stats['unique_years']}</div>
	</div>
	<div class="metric-container">
	<div class="metric-label">Unique Files</div>
	<div class="metric-value">{stats['unique_filenames']}</div>
	</div>
	</div>
	""", unsafe_allow_html=True)

	# Charts - three columns to include Districts
	col1, col2, col3 = st.columns(3)

	with col1:
	# Source distribution chart
	if stats['source_distribution']:
	source_df = pd.DataFrame(
	list(stats['source_distribution'].items()),
	columns=['Source', 'Count']
	)
	fig_source = px.bar(
	source_df,
	x='Count',
	y='Source',
	orientation='h',
	title='Distribution by Source',
	color='Count',
	color_continuous_scale='viridis'
	)
	fig_source.update_layout(height=400, showlegend=False)
	st.plotly_chart(fig_source, use_container_width=True) # Note: plotly_chart still uses use_container_width

	with col2:
	# Year distribution chart
	if stats['year_distribution']:
	# Filter out 'Unknown' years for the chart
	year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'}
	if year_dist_filtered:
	year_df = pd.DataFrame(
	list(year_dist_filtered.items()),
	columns=['Year', 'Count']
	)
	# Sort by year as integer but keep as string for categorical display
	year_df['Year_Int'] = year_df['Year'].astype(int)
	year_df = year_df.sort_values('Year_Int').drop('Year_Int', axis=1)

	fig_year = px.bar(
	year_df,
	x='Year',
	y='Count',
	title='Distribution by Year',
	color='Count',
	color_continuous_scale='plasma'
	)
	# Ensure years are treated as categorical (discrete) not continuous
	fig_year.update_xaxes(type='category')
	fig_year.update_layout(height=400, showlegend=False)
	st.plotly_chart(fig_year, use_container_width=True) # Note: plotly_chart still uses use_container_width
	else:
	st.info("No valid years found in the results")

	with col3:
	# District distribution chart
	if stats.get('district_distribution'):
	district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'}
	if district_dist_filtered:
	district_df = pd.DataFrame(
	list(district_dist_filtered.items()),
	columns=['District', 'Count']
	)
	district_df = district_df.sort_values('Count', ascending=False)

	fig_district = px.bar(
	district_df,
	x='Count',
	y='District',
	orientation='h',
	title='Distribution by District',
	color='Count',
	color_continuous_scale='blues'
	)
	fig_district.update_layout(height=400, showlegend=False)
	st.plotly_chart(fig_district, use_container_width=True) # Note: plotly_chart still uses use_container_width
	else:
	st.info("No valid districts found in the results")

	# Close the container
	st.markdown('</div>', unsafe_allow_html=True)


	def display_chunk_statistics_table(stats: Dict[str, Any], title: str = "Retrieval Distribution"):
	"""Display statistics as tables for smaller results with fixed alignment."""
	if not stats or stats.get('total_chunks', 0) == 0:
	return

	# Wrap in styled container
	st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)

	st.subheader(f"📊 {title}")

	# Create a container with fixed height for alignment
	stats_container = st.container()

	with stats_container:
	# Create 4 equal columns for consistent alignment
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.markdown("🏘️ Districts")
	if stats.get('district_distribution'):
	district_dist_filtered = {k: v for k, v in stats['district_distribution'].items() if k != 'Unknown'}
	if district_dist_filtered:
	district_data = {
	"District": list(district_dist_filtered.keys()),
	"Count": list(district_dist_filtered.values())
	}
	district_df = pd.DataFrame(district_data).sort_values('Count', ascending=False)
	st.dataframe(district_df, hide_index=True, width='stretch')
	else:
	st.write("No district data")
	else:
	st.write("No district data")

	with col2:
	st.markdown("📂 Sources")
	if stats['source_distribution']:
	source_data = {
	"Source": list(stats['source_distribution'].keys()),
	"Count": list(stats['source_distribution'].values())
	}
	source_df = pd.DataFrame(source_data).sort_values('Count', ascending=False)
	st.dataframe(source_df, hide_index=True, width='stretch')
	else:
	st.write("No source data")

	with col3:
	st.markdown("📅 Years")
	if stats['year_distribution']:
	year_dist_filtered = {k: v for k, v in stats['year_distribution'].items() if k != 'Unknown'}
	if year_dist_filtered:
	year_data = {
	"Year": list(year_dist_filtered.keys()),
	"Count": list(year_dist_filtered.values())
	}
	year_df = pd.DataFrame(year_data)
	# Sort by year as integer but display as string
	year_df['Year_Int'] = year_df['Year'].astype(int)
	year_df = year_df.sort_values('Year_Int')[['Year', 'Count']]
	st.dataframe(year_df, hide_index=True, width='stretch')
	else:
	st.write("No year data")
	else:
	st.write("No year data")

	with col4:
	st.markdown("📄 Files")
	if stats['filename_distribution']:
	filename_items = list(stats['filename_distribution'].items())
	filename_items.sort(key=lambda x: x[1], reverse=True)

	# Show top files with truncated names
	file_data = {
	"File": [f[:30] + "..." if len(f) > 30 else f for f, c in filename_items[:5]],
	"Count": [c for f, c in filename_items[:5]]
	}
	file_df = pd.DataFrame(file_data)
	st.dataframe(file_df, hide_index=True, width='stretch')
	else:
	st.write("No file data")

	# Close container
	st.markdown('</div>', unsafe_allow_html=True)