Spaces:

paulokewunmi
/

ba_dashboard

Sleeping

App Files Files Community

ba_dashboard / src /components /chart_display.py

paulokewunmi

Upload 10 files

0847744 verified 9 months ago

raw

history blame contribute delete

13.8 kB

	import streamlit as st
	from utils.visualization import create_radar_chart, create_bar_chart

	def display_eval_results(eval_results, chart_type, group_by_thread=False):
	"""
	Display evaluation results with tabs and charts

	Args:
	eval_results (dict): Evaluation results
	chart_type (str): Type of chart to create ('radar' or 'bar')
	group_by_thread (bool): Whether results are grouped by thread instead of plot
	"""
	if not eval_results:
	st.info("Select filters to view evaluation results.")
	return

	if "message" in eval_results:
	st.warning(eval_results["message"])
	return

	# Display raw data in a collapsible section
	with st.expander("View Raw Data"):
	st.json(eval_results)

	# Case 1: Results grouped by thread/plot with journeyEvals and aiScriptEvals
	if isinstance(eval_results, dict) and all(isinstance(val, dict) and "journeyEvals" in val and "aiScriptEvals" in val for val in eval_results.values()):
	# For each thread/plot, create a section with tabs for journey evals and ai script evals
	for group_name, group_data in eval_results.items():
	# Use appropriate header based on grouping type
	if group_by_thread:
	st.subheader(f"Thread: {group_name}")
	else:
	st.subheader(f"Plot: {group_name}")

	# Create tabs for Journey Evals and AI Script Evals
	journey_tab, aiscript_tab = st.tabs(["Journey Evaluations", "AI Script Evaluations"])

	# Process Journey Evaluations
	with journey_tab:
	if "journeyEvals" in group_data and group_data["journeyEvals"]:
	eval_names = list(group_data["journeyEvals"].keys())

	# Create tabs for each journey eval type
	if len(eval_names) > 0:
	journey_eval_tabs = st.tabs(eval_names)

	for i, eval_name in enumerate(eval_names):
	with journey_eval_tabs[i]:
	metrics = group_data["journeyEvals"][eval_name]
	if chart_type == 'radar':
	chart = create_radar_chart(metrics, f"{eval_name}")
	else:
	chart = create_bar_chart(metrics, f"{eval_name}")
	# Add unique key for each chart
	st.plotly_chart(chart, use_container_width=True, key=f"journey_{group_name}_{eval_name}")
	else:
	st.info("No journey evaluation metrics available for this group.")
	else:
	st.info("No journey evaluation data available for this group.")

	# Process AI Script Evaluations
	with aiscript_tab:
	if "aiScriptEvals" in group_data and group_data["aiScriptEvals"]:
	eval_names = list(group_data["aiScriptEvals"].keys())

	# Create tabs for each AI script eval type
	if len(eval_names) > 0:
	aiscript_eval_tabs = st.tabs(eval_names)

	for i, eval_name in enumerate(eval_names):
	with aiscript_eval_tabs[i]:
	metrics = group_data["aiScriptEvals"][eval_name]
	if chart_type == 'radar':
	chart = create_radar_chart(metrics, f"{eval_name}")
	else:
	chart = create_bar_chart(metrics, f"{eval_name}")
	# Add unique key for each chart
	st.plotly_chart(chart, use_container_width=True, key=f"aiscript_{group_name}_{eval_name}")
	else:
	st.info("No AI script evaluation metrics available for this group.")
	else:
	st.info("No AI script evaluation data available for this group.")

	# Add a separator between plots/threads
	st.markdown("---")

	# Case 2: AI Script Evaluation grouped by thread/plot with nested eval structure
	elif isinstance(eval_results, dict) and all(isinstance(val, dict) and not ("journeyEvals" in val or "aiScriptEvals" in val) for val in eval_results.values()):
	# For each thread/plot, create a section with tabs for AI script evals
	for group_name, group_data in eval_results.items():
	# Use appropriate header based on grouping type
	if group_by_thread:
	st.subheader(f"Thread: {group_name}")
	else:
	st.subheader(f"Plot: {group_name}")

	# Get eval names
	eval_names = list(group_data.keys())

	# Create tabs for each AI script eval type
	if len(eval_names) > 0:
	aiscript_eval_tabs = st.tabs(eval_names)

	for i, eval_name in enumerate(eval_names):
	with aiscript_eval_tabs[i]:
	metrics = group_data[eval_name]
	if chart_type == 'radar':
	chart = create_radar_chart(metrics, f"{eval_name}")
	else:
	chart = create_bar_chart(metrics, f"{eval_name}")
	# Add unique key for each chart
	st.plotly_chart(chart, use_container_width=True, key=f"aiscript_{group_name}_{eval_name}")
	else:
	st.info("No AI script evaluation metrics available for this group.")

	# Add a separator between groups
	st.markdown("---")

	# Case 3: Journey Evaluation with group_by_plots=False or other query types with flat structure
	elif isinstance(eval_results, dict) and "journeyEvals" in eval_results and "aiScriptEvals" in eval_results:

	# Create tabs for Journey Evals and AI Script Evals
	journey_tab, aiscript_tab = st.tabs(["Journey Evaluations", "AI Script Evaluations"])

	# Process Journey Evaluations
	with journey_tab:
	if eval_results["journeyEvals"]:
	eval_names = list(eval_results["journeyEvals"].keys())

	# Create tabs for each journey eval type
	if len(eval_names) > 0:
	journey_eval_tabs = st.tabs(eval_names)

	for i, eval_name in enumerate(eval_names):
	with journey_eval_tabs[i]:
	metrics = eval_results["journeyEvals"][eval_name]
	if chart_type == 'radar':
	chart = create_radar_chart(metrics, f"{eval_name}")
	else:
	chart = create_bar_chart(metrics, f"{eval_name}")
	# Add unique key for each chart
	st.plotly_chart(chart, use_container_width=True, key=f"journey_{eval_name}")
	else:
	st.info("No journey evaluation metrics available.")
	else:
	st.info("No journey evaluation data available.")

	# Process AI Script Evaluations
	with aiscript_tab:
	if eval_results["aiScriptEvals"]:
	eval_names = list(eval_results["aiScriptEvals"].keys())

	# Create tabs for each AI script eval type
	if len(eval_names) > 0:
	aiscript_eval_tabs = st.tabs(eval_names)

	for i, eval_name in enumerate(eval_names):
	with aiscript_eval_tabs[i]:
	metrics = eval_results["aiScriptEvals"][eval_name]
	if chart_type == 'radar':
	chart = create_radar_chart(metrics, f"{eval_name}")
	else:
	chart = create_bar_chart(metrics, f"{eval_name}")
	# Add unique key for each chart
	st.plotly_chart(chart, use_container_width=True, key=f"aiscript_{eval_name}")
	else:
	st.info("No AI script evaluation metrics available.")
	else:
	st.info("No AI script evaluation data available.")

	# Case 4: AI Script Evaluation with group_by_plots=False
	elif isinstance(eval_results, dict) and not ("journeyEvals" in eval_results or "aiScriptEvals" in eval_results):
	# Get eval names
	eval_names = list(eval_results.keys())

	# Create tabs for each AI script eval type
	if len(eval_names) > 0:
	aiscript_eval_tabs = st.tabs(eval_names)

	for i, eval_name in enumerate(eval_names):
	with aiscript_eval_tabs[i]:
	metrics = eval_results[eval_name]
	if chart_type == 'radar':
	chart = create_radar_chart(metrics, f"{eval_name}")
	else:
	chart = create_bar_chart(metrics, f"{eval_name}")
	# Add unique key for each chart
	st.plotly_chart(chart, use_container_width=True, key=f"aiscript_direct_{eval_name}")
	else:
	st.info("No AI script evaluation metrics available.")

	else:
	st.error("Unrecognized result format")

	def display_data_overview(runs_df, turns_df, ai_script_evals_df, journey_evals_df):
	"""
	Display data overview tabs

	Args:
	runs_df (pd.DataFrame): Runs data
	turns_df (pd.DataFrame): Turns data
	ai_script_evals_df (pd.DataFrame): AI script evaluations data
	journey_evals_df (pd.DataFrame): Journey evaluations data
	"""
	st.header("Data Overview")

	# Check if any data is available
	if runs_df.empty and turns_df.empty and ai_script_evals_df.empty and journey_evals_df.empty:
	st.warning("No data available. Please check your connection to AWS Athena and ensure that the database and tables exist.")
	# Add some troubleshooting tips
	with st.expander("Troubleshooting Tips"):
	st.markdown("""
	### Troubleshooting Steps:

	1. AWS SSO Authentication: Make sure you've run `aws sso login --profile your_profile` before starting the dashboard

	2. AWS Region: Verify that the region in your `.env` file matches the region where your Athena database is located

	3. Athena Database and Tables: Confirm that the database name and table names in your `.env` file are correct

	4. AWS Permissions: Ensure your AWS role has permissions to query Athena and access the S3 bucket for query results

	5. Network Connectivity: Check that you have network connectivity to AWS services

	6. Check Logs: Look at the application logs for more detailed error messages
	""")
	return

	# Display tabs with data
	overview_tabs = st.tabs(["Runs", "Turns", "AI Script Evals", "Journey Evals"])

	with overview_tabs[0]:
	st.subheader("Evaluation Runs")
	if runs_df.empty:
	st.info("No run data available.")
	else:
	st.dataframe(runs_df)

	with overview_tabs[1]:
	st.subheader("Turns")
	if turns_df.empty:
	st.info("No turn data available.")
	else:
	st.dataframe(turns_df)

	with overview_tabs[2]:
	st.subheader("AI Script Evaluations")
	if ai_script_evals_df.empty:
	st.info("No AI script evaluation data available.")
	else:
	st.dataframe(ai_script_evals_df)

	with overview_tabs[3]:
	st.subheader("Journey Evaluations")
	if journey_evals_df.empty:
	st.info("No journey evaluation data available.")
	else:
	st.dataframe(journey_evals_df)

	def display_documentation():
	"""
	Display documentation section
	"""
	with st.expander("Documentation"):
	st.markdown("""
	## Evaluation Dashboard Documentation

	This dashboard allows you to explore and visualize evaluation data from AI runs.

	### Query Types

	1. Plot Evaluation: View metrics for a specific plot
	2. Journey Evaluation: View metrics for a specific journey, optionally grouped by plots or threads
	3. AI Script Evaluation: View metrics for a specific AI script, optionally grouped by plots or threads
	4. Shared Evaluations: View metrics for evaluations that are shared across all runs

	### Filters

	- Aggregation Type: Choose how to aggregate metric scores (mean, median, etc.)
	- Filter by Last N Days: Only include evaluations from the last N days
	- Runtime Evaluations Only: Only include evaluations that were run during runtime (thread_id is not null)
	When this option is selected, results are grouped by thread ID instead of plot
	- Chart Type: Choose between radar charts and bar charts

	### Data Overview

	The Data Overview section shows the raw data in tabular format.
	""")