Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| tools = pd.read_csv("./data/tools.csv") | |
| demo = gr.Blocks() | |
| INC_TOOLS = [ | |
| 'prediction-online', | |
| 'prediction-offline', | |
| 'claude-prediction-online', | |
| 'claude-prediction-offline', | |
| 'prediction-offline-sme', | |
| 'prediction-online-sme', | |
| 'prediction-request-rag', | |
| 'prediction-request-reasoning', | |
| 'prediction-url-cot-claude', | |
| 'prediction-request-rag-claude', | |
| 'prediction-request-reasoning-claude' | |
| ] | |
| def set_error(row): | |
| if row.error not in [True, False]: | |
| if not row.prompt_response: | |
| return True | |
| return False | |
| return row.error | |
| def get_error_data(): | |
| tools_inc = tools[tools['tool'].isin(INC_TOOLS)] | |
| tools_inc['error'] = tools_inc.apply(set_error, axis=1) | |
| error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index() | |
| error['error_perc'] = (error[True] / (error[False] + error[True])) * 100 | |
| error['total_requests'] = error[False] + error[True] | |
| return error | |
| def get_error_data_all(error): | |
| error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index() | |
| error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100 | |
| error_total.columns = error_total.columns.astype(str) | |
| error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4)) | |
| return error_total | |
| error = get_error_data() | |
| error_all = get_error_data_all(error) | |
| with demo: | |
| gr.HTML("<h1>Olas Predict Actual Performance</h1>") | |
| gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.") | |
| with gr.Tabs(): | |
| with gr.TabItem("🔥 Error Dashboard"): | |
| with gr.Row(): | |
| gr.Markdown("# Plot showing overall error") | |
| with gr.Row(): | |
| # plot | |
| with gr.Column(): | |
| gr.BarPlot( | |
| value=error_all, | |
| x="request_month_year_week", | |
| y="error_perc", | |
| title="Error Percentage", | |
| x_title="Week", | |
| y_title="Error Percentage", | |
| height=800, | |
| show_label=True, | |
| interactive=True, | |
| show_actions_button=True, | |
| tooltip=["request_month_year_week", "error_perc"] | |
| ) | |
| with gr.Row(): | |
| gr.Markdown("# Plot showing error by tool") | |
| with gr.Row(): | |
| sel_tool = gr.Dropdown(label="Select a tool", choices=INC_TOOLS, value=INC_TOOLS[0]) | |
| with gr.Row(): | |
| plot_tool_error = gr.BarPlot( | |
| title="Error Percentage", | |
| x_title="Week", | |
| y_title="Error Percentage", | |
| show_label=True, | |
| interactive=True, | |
| show_actions_button=True, | |
| tooltip=["request_month_year_week", "error_perc"], | |
| width=800 | |
| ) | |
| with gr.Row(): | |
| gr.Markdown("# Plot showing error by week") | |
| with gr.Row(): | |
| choices = error['request_month_year_week'].unique().tolist() | |
| # sort the choices by the latest week to be on the top | |
| choices = sorted(choices) | |
| sel_week = gr.Dropdown( | |
| label="Select a week", | |
| choices=choices, | |
| value=choices[-1] | |
| ) | |
| with gr.Row(): | |
| plot_week_error = gr.BarPlot( | |
| title="Error Percentage", | |
| x_title="Tool", | |
| y_title="Error Percentage", | |
| show_label=True, | |
| interactive=True, | |
| show_actions_button=True, | |
| tooltip=["tool", "error_perc"], | |
| width=800 | |
| ) | |
| def update_tool_plot(selected_tool): | |
| filtered_data = error[error['tool'] == selected_tool] | |
| # convert column name to string | |
| filtered_data.columns = filtered_data.columns.astype(str) | |
| # convert error_perc to 4 decimal place | |
| filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4)) | |
| update = gr.LinePlot( | |
| title="Error Percentage", | |
| x_title="Week", | |
| y_title="Error Percentage", | |
| x="request_month_year_week", | |
| y="error_perc", | |
| value=filtered_data | |
| ) | |
| return update | |
| def update_week_plot(selected_week): | |
| filtered_data = error[error['request_month_year_week'] == selected_week] | |
| # convert column name to string | |
| filtered_data.columns = filtered_data.columns.astype(str) | |
| # convert error_perc to 4 decimal place | |
| filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4)) | |
| update = gr.BarPlot( | |
| title="Error Percentage", | |
| x_title="Tool", | |
| y_title="Error Percentage", | |
| x="tool", | |
| y="error_perc", | |
| value=filtered_data | |
| ) | |
| return update | |
| sel_tool.change(update_tool_plot, inputs=sel_tool, outputs=plot_tool_error) | |
| sel_week.change(update_week_plot, inputs=sel_week, outputs=plot_week_error) | |
| with gr.Row(): | |
| sel_tool | |
| with gr.Row(): | |
| plot_tool_error | |
| with gr.Row(): | |
| sel_week | |
| with gr.Row(): | |
| plot_week_error | |
| with gr.TabItem("ℹ️ About"): | |
| with gr.Accordion("About the Benchmark"): | |
| gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.") | |
| demo.queue(default_concurrency_limit=40).launch() | |