ba_dashboard / src /components /chart_display.py
paulokewunmi's picture
Upload 10 files
0847744 verified
import streamlit as st
from utils.visualization import create_radar_chart, create_bar_chart
def display_eval_results(eval_results, chart_type, group_by_thread=False):
"""
Display evaluation results with tabs and charts
Args:
eval_results (dict): Evaluation results
chart_type (str): Type of chart to create ('radar' or 'bar')
group_by_thread (bool): Whether results are grouped by thread instead of plot
"""
if not eval_results:
st.info("Select filters to view evaluation results.")
return
if "message" in eval_results:
st.warning(eval_results["message"])
return
# Display raw data in a collapsible section
with st.expander("View Raw Data"):
st.json(eval_results)
# Case 1: Results grouped by thread/plot with journeyEvals and aiScriptEvals
if isinstance(eval_results, dict) and all(isinstance(val, dict) and "journeyEvals" in val and "aiScriptEvals" in val for val in eval_results.values()):
# For each thread/plot, create a section with tabs for journey evals and ai script evals
for group_name, group_data in eval_results.items():
# Use appropriate header based on grouping type
if group_by_thread:
st.subheader(f"Thread: {group_name}")
else:
st.subheader(f"Plot: {group_name}")
# Create tabs for Journey Evals and AI Script Evals
journey_tab, aiscript_tab = st.tabs(["Journey Evaluations", "AI Script Evaluations"])
# Process Journey Evaluations
with journey_tab:
if "journeyEvals" in group_data and group_data["journeyEvals"]:
eval_names = list(group_data["journeyEvals"].keys())
# Create tabs for each journey eval type
if len(eval_names) > 0:
journey_eval_tabs = st.tabs(eval_names)
for i, eval_name in enumerate(eval_names):
with journey_eval_tabs[i]:
metrics = group_data["journeyEvals"][eval_name]
if chart_type == 'radar':
chart = create_radar_chart(metrics, f"{eval_name}")
else:
chart = create_bar_chart(metrics, f"{eval_name}")
# Add unique key for each chart
st.plotly_chart(chart, use_container_width=True, key=f"journey_{group_name}_{eval_name}")
else:
st.info("No journey evaluation metrics available for this group.")
else:
st.info("No journey evaluation data available for this group.")
# Process AI Script Evaluations
with aiscript_tab:
if "aiScriptEvals" in group_data and group_data["aiScriptEvals"]:
eval_names = list(group_data["aiScriptEvals"].keys())
# Create tabs for each AI script eval type
if len(eval_names) > 0:
aiscript_eval_tabs = st.tabs(eval_names)
for i, eval_name in enumerate(eval_names):
with aiscript_eval_tabs[i]:
metrics = group_data["aiScriptEvals"][eval_name]
if chart_type == 'radar':
chart = create_radar_chart(metrics, f"{eval_name}")
else:
chart = create_bar_chart(metrics, f"{eval_name}")
# Add unique key for each chart
st.plotly_chart(chart, use_container_width=True, key=f"aiscript_{group_name}_{eval_name}")
else:
st.info("No AI script evaluation metrics available for this group.")
else:
st.info("No AI script evaluation data available for this group.")
# Add a separator between plots/threads
st.markdown("---")
# Case 2: AI Script Evaluation grouped by thread/plot with nested eval structure
elif isinstance(eval_results, dict) and all(isinstance(val, dict) and not ("journeyEvals" in val or "aiScriptEvals" in val) for val in eval_results.values()):
# For each thread/plot, create a section with tabs for AI script evals
for group_name, group_data in eval_results.items():
# Use appropriate header based on grouping type
if group_by_thread:
st.subheader(f"Thread: {group_name}")
else:
st.subheader(f"Plot: {group_name}")
# Get eval names
eval_names = list(group_data.keys())
# Create tabs for each AI script eval type
if len(eval_names) > 0:
aiscript_eval_tabs = st.tabs(eval_names)
for i, eval_name in enumerate(eval_names):
with aiscript_eval_tabs[i]:
metrics = group_data[eval_name]
if chart_type == 'radar':
chart = create_radar_chart(metrics, f"{eval_name}")
else:
chart = create_bar_chart(metrics, f"{eval_name}")
# Add unique key for each chart
st.plotly_chart(chart, use_container_width=True, key=f"aiscript_{group_name}_{eval_name}")
else:
st.info("No AI script evaluation metrics available for this group.")
# Add a separator between groups
st.markdown("---")
# Case 3: Journey Evaluation with group_by_plots=False or other query types with flat structure
elif isinstance(eval_results, dict) and "journeyEvals" in eval_results and "aiScriptEvals" in eval_results:
# Create tabs for Journey Evals and AI Script Evals
journey_tab, aiscript_tab = st.tabs(["Journey Evaluations", "AI Script Evaluations"])
# Process Journey Evaluations
with journey_tab:
if eval_results["journeyEvals"]:
eval_names = list(eval_results["journeyEvals"].keys())
# Create tabs for each journey eval type
if len(eval_names) > 0:
journey_eval_tabs = st.tabs(eval_names)
for i, eval_name in enumerate(eval_names):
with journey_eval_tabs[i]:
metrics = eval_results["journeyEvals"][eval_name]
if chart_type == 'radar':
chart = create_radar_chart(metrics, f"{eval_name}")
else:
chart = create_bar_chart(metrics, f"{eval_name}")
# Add unique key for each chart
st.plotly_chart(chart, use_container_width=True, key=f"journey_{eval_name}")
else:
st.info("No journey evaluation metrics available.")
else:
st.info("No journey evaluation data available.")
# Process AI Script Evaluations
with aiscript_tab:
if eval_results["aiScriptEvals"]:
eval_names = list(eval_results["aiScriptEvals"].keys())
# Create tabs for each AI script eval type
if len(eval_names) > 0:
aiscript_eval_tabs = st.tabs(eval_names)
for i, eval_name in enumerate(eval_names):
with aiscript_eval_tabs[i]:
metrics = eval_results["aiScriptEvals"][eval_name]
if chart_type == 'radar':
chart = create_radar_chart(metrics, f"{eval_name}")
else:
chart = create_bar_chart(metrics, f"{eval_name}")
# Add unique key for each chart
st.plotly_chart(chart, use_container_width=True, key=f"aiscript_{eval_name}")
else:
st.info("No AI script evaluation metrics available.")
else:
st.info("No AI script evaluation data available.")
# Case 4: AI Script Evaluation with group_by_plots=False
elif isinstance(eval_results, dict) and not ("journeyEvals" in eval_results or "aiScriptEvals" in eval_results):
# Get eval names
eval_names = list(eval_results.keys())
# Create tabs for each AI script eval type
if len(eval_names) > 0:
aiscript_eval_tabs = st.tabs(eval_names)
for i, eval_name in enumerate(eval_names):
with aiscript_eval_tabs[i]:
metrics = eval_results[eval_name]
if chart_type == 'radar':
chart = create_radar_chart(metrics, f"{eval_name}")
else:
chart = create_bar_chart(metrics, f"{eval_name}")
# Add unique key for each chart
st.plotly_chart(chart, use_container_width=True, key=f"aiscript_direct_{eval_name}")
else:
st.info("No AI script evaluation metrics available.")
else:
st.error("Unrecognized result format")
def display_data_overview(runs_df, turns_df, ai_script_evals_df, journey_evals_df):
"""
Display data overview tabs
Args:
runs_df (pd.DataFrame): Runs data
turns_df (pd.DataFrame): Turns data
ai_script_evals_df (pd.DataFrame): AI script evaluations data
journey_evals_df (pd.DataFrame): Journey evaluations data
"""
st.header("Data Overview")
# Check if any data is available
if runs_df.empty and turns_df.empty and ai_script_evals_df.empty and journey_evals_df.empty:
st.warning("No data available. Please check your connection to AWS Athena and ensure that the database and tables exist.")
# Add some troubleshooting tips
with st.expander("Troubleshooting Tips"):
st.markdown("""
### Troubleshooting Steps:
1. **AWS SSO Authentication**: Make sure you've run `aws sso login --profile your_profile` before starting the dashboard
2. **AWS Region**: Verify that the region in your `.env` file matches the region where your Athena database is located
3. **Athena Database and Tables**: Confirm that the database name and table names in your `.env` file are correct
4. **AWS Permissions**: Ensure your AWS role has permissions to query Athena and access the S3 bucket for query results
5. **Network Connectivity**: Check that you have network connectivity to AWS services
6. **Check Logs**: Look at the application logs for more detailed error messages
""")
return
# Display tabs with data
overview_tabs = st.tabs(["Runs", "Turns", "AI Script Evals", "Journey Evals"])
with overview_tabs[0]:
st.subheader("Evaluation Runs")
if runs_df.empty:
st.info("No run data available.")
else:
st.dataframe(runs_df)
with overview_tabs[1]:
st.subheader("Turns")
if turns_df.empty:
st.info("No turn data available.")
else:
st.dataframe(turns_df)
with overview_tabs[2]:
st.subheader("AI Script Evaluations")
if ai_script_evals_df.empty:
st.info("No AI script evaluation data available.")
else:
st.dataframe(ai_script_evals_df)
with overview_tabs[3]:
st.subheader("Journey Evaluations")
if journey_evals_df.empty:
st.info("No journey evaluation data available.")
else:
st.dataframe(journey_evals_df)
def display_documentation():
"""
Display documentation section
"""
with st.expander("Documentation"):
st.markdown("""
## Evaluation Dashboard Documentation
This dashboard allows you to explore and visualize evaluation data from AI runs.
### Query Types
1. **Plot Evaluation**: View metrics for a specific plot
2. **Journey Evaluation**: View metrics for a specific journey, optionally grouped by plots or threads
3. **AI Script Evaluation**: View metrics for a specific AI script, optionally grouped by plots or threads
4. **Shared Evaluations**: View metrics for evaluations that are shared across all runs
### Filters
- **Aggregation Type**: Choose how to aggregate metric scores (mean, median, etc.)
- **Filter by Last N Days**: Only include evaluations from the last N days
- **Runtime Evaluations Only**: Only include evaluations that were run during runtime (thread_id is not null)
When this option is selected, results are grouped by thread ID instead of plot
- **Chart Type**: Choose between radar charts and bar charts
### Data Overview
The Data Overview section shows the raw data in tabular format.
""")