eustlb's picture
eustlb HF Staff
app prototype
94b0fbf
raw
history blame
2.03 kB
import pandas as pd
import gradio as gr
# Load the CSV file
model_test_results = pd.read_csv('test_results_by_type.csv')
# Get models with failed tests and their failure counts
failed_models_counts = model_test_results[
(model_test_results['test_type'] == 'failed') &
(model_test_results['number_of_tests'] > 0)
].groupby('model')['number_of_tests'].first().to_dict()
# Add ❌ and failure count to model names that have failures, βœ… for passing models
model_test_results['model'] = model_test_results.apply(
lambda row: f"{row['model']} ❌ ({failed_models_counts[row['model']]})" if row['model'] in failed_models_counts else f"{row['model']} βœ…",
axis=1
)
# Separate failed tests and other tests
failed_tests = model_test_results[model_test_results['test_type'] == 'failed'].sort_values('number_of_tests', ascending=False)
other_tests = model_test_results[model_test_results['test_type'] != 'failed']
# Concatenate the dataframes
model_test_results = pd.concat([failed_tests, other_tests])
# Create the Gradio interface
with gr.Blocks() as test_results_viz:
gr.Markdown("# Test Results by Model")
# Sort models by success/failure and number of failed tests
model_order = model_test_results.sort_values(
by=['conclusion', 'test_type', 'number_of_tests'],
ascending=[True, False, False]
)['model'].unique().tolist()
# Create the stacked bar plot using Gradio's BarPlot
test_results_plot = gr.BarPlot(
model_test_results,
x="model",
y="number_of_tests", # Base layer
color="test_type", # Color by pass/fail status
color_map={"passed": "#008550", "skipped": "#F0B702", "failed": "#8B1710"},
title="Test Results by Model",
x_title="Model",
y_title="Number of Tests",
height=600,
width=1000,
x_label_angle=45, # Rotate x-axis labels by 45 degrees
x_order=model_order # Set custom order of x-axis
)
if __name__ == "__main__":
test_results_viz.launch()