| import gradio as gr |
| import plotly.express as px |
| from pathlib import Path |
| import pandas as pd |
| import numpy as np |
| from langchain_openai import ChatOpenAI |
| from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent |
| from langchain.agents.agent_types import AgentType |
| from langchain_google_genai import ChatGoogleGenerativeAI |
| import plotly.graph_objects as go |
|
|
| def explain_df(query, df): |
| agent = create_pandas_dataframe_agent( |
| |
| |
| |
| |
| |
| ChatGoogleGenerativeAI( |
| model="gemini-1.5-pro", |
| temperature=0, |
| max_tokens=None, |
| timeout=None, |
| max_retries=2, |
| ), |
| df, |
| verbose=True, |
| allow_dangerous_code=True, |
| ) |
| response = agent.invoke(query) |
| return response['output'] |
|
|
| |
| abs_path = Path(__file__).parent |
|
|
| def parse_model_args(model_args): |
| if "deltazip" in model_args: |
| model_args = model_args.split("deltazip")[1] |
| model_args = model_args.split(",")[0] |
| model_args = model_args.strip(".") |
| model_args = model_args.replace(".", "/") |
| if "espressor/" in model_args: |
| model_args = model_args.split("espressor/")[1] |
| model_args = model_args.split(",")[0] |
| model_args = model_args.strip(".") |
| model_args = model_args.replace(".", "/",1) |
| model_args = model_args.split("_")[0] |
| else: |
| model_args = model_args.split(",")[0] |
| model_args = model_args.replace("pretrained=", "") |
| return model_args |
|
|
| def parse_model_precision(model_args): |
| if "espressor" in model_args: |
| if 'W8A8_int8' in model_args: |
| precision = 'W8A8_int8' |
| else: |
| precision = model_args.split("_")[-1] |
| else: |
| precision = "Default" |
| return precision |
|
|
| |
| df = pd.read_csv(str(abs_path / "eval_results.csv")) |
| perf_df = pd.read_csv(str(abs_path / "perfbench_results.csv")) |
| |
| df = df[df['metric'] == 'acc'] |
| |
| df = df.drop_duplicates(subset=['model', 'task']) |
| |
| |
| df['model_physical_size'] = df['model_physical_size'].apply(lambda x: x/1024/1024/1024) |
|
|
| df = df.pivot(index=['model','hf_name','model_physical_size'], columns='task', values='value').reset_index() |
|
|
| df['precision'] = df['model'].apply(lambda x: x.split(":")[-1]) |
| df['model'] = df['model'].apply(lambda x: x.split(":")[0]) |
| df['avg_acc'] = df.filter(like='task_').mean(axis=1) |
|
|
| df = df.rename(columns=lambda x: x.replace('task_', '')) |
| numeric_columns = df.select_dtypes(include=[np.number]).columns |
| |
| numeric_columns = numeric_columns.drop('model_physical_size') |
| df[numeric_columns] = (df[numeric_columns]*100).round(2) |
| df['model_physical_size'] = df['model_physical_size'].round(2) |
|
|
| full_df = df.merge(perf_df, left_on='hf_name', right_on='hf_name', how='left') |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown(""" |
| # 🥇 Efficient LLM Leaderboard |
| """) |
| with gr.Tabs(): |
| with gr.TabItem("Leaderboard"): |
| |
| task_options = [col for col in df.columns if col not in ['model','hf_name','model_physical_size', 'precision']] |
| task_options.append("plot_pareto") |
| with gr.Row(): |
| |
| selected_tasks = gr.CheckboxGroup(choices=task_options, label="Select Tasks") |
| with gr.Row(): |
| accuracy_plot = gr.Plot(label="Accuracy Plot") |
| line_plot = gr.Plot(label="Average Accuracy vs Model Size") |
| with gr.Row(): |
| throughput_line_plot = gr.Plot(label="Throughput vs Average Accuracy") |
| latency_line_plot = gr.Plot(label="Latency vs Average Accuracy") |
| with gr.Row(): |
| data_table = gr.Dataframe(value=df, label="Result Table") |
|
|
| def update_outputs(selected_tasks): |
| if not selected_tasks: |
| return df[['model', 'precision']], None, None |
| plot_pareto=False |
| if "plot_pareto" in selected_tasks: |
| plot_pareto = True |
| selected_tasks.remove("plot_pareto") |
| filtered_df = df[['model', 'precision', 'model_physical_size','hf_name'] + selected_tasks] |
| |
| filtered_df['avg_accuracy'] = filtered_df[selected_tasks].mean(axis=1) |
| |
| bar_fig = px.bar(filtered_df, x='model', y='avg_accuracy', color='precision', barmode='group') |
| line_fig = px.line(filtered_df, x='model_physical_size', y='avg_accuracy', color='model', symbol='precision') |
| pareto_df = filtered_df.sort_values('model_physical_size') |
| pareto_df = pareto_df.loc[pareto_df['avg_accuracy'].cummax().drop_duplicates().index] |
| |
| if plot_pareto: |
| line_fig.add_trace(go.Scatter( |
| x=pareto_df['model_physical_size'], |
| y=pareto_df['avg_accuracy'], |
| mode='lines+markers', |
| name='Pareto Frontier' |
| )) |
| |
| |
| bar_fig.update_layout(title=f'tasks: {", ".join(selected_tasks)}') |
| line_fig.update_layout(title=f'tasks: {", ".join(selected_tasks)}') |
| with_perf_df = filtered_df.merge(perf_df, left_on='hf_name', right_on='hf_name', how='left') |
| throughput_line_fig = px.line(with_perf_df, x='output_throughput', y='avg_accuracy', color='model', symbol='precision') |
| latency_line_fig = px.line(with_perf_df, x="avg_e2e_latency", y='avg_accuracy', color='model', symbol='precision') |
| |
| pareto_df = with_perf_df.sort_values('avg_e2e_latency') |
| pareto_df = pareto_df.loc[pareto_df['avg_accuracy'].cummax().drop_duplicates().index] |
| if plot_pareto: |
| latency_line_fig.add_trace(go.Scatter( |
| x=pareto_df['avg_e2e_latency'], |
| y=pareto_df['avg_accuracy'], |
| mode='lines+markers', |
| name='Pareto Frontier' |
| )) |
| return with_perf_df, bar_fig, line_fig, throughput_line_fig, latency_line_fig |
| |
| selected_tasks.change( |
| fn=update_outputs, |
| inputs=selected_tasks, |
| outputs=[data_table, accuracy_plot, line_plot, throughput_line_plot, latency_line_plot] |
| ) |
| with gr.TabItem("Find Model"): |
| query_input = gr.Textbox(label="Enter your query", placeholder="Enter your query here") |
| response_output = gr.Textbox(label="Response", interactive=False) |
| query_input.submit( |
| fn=lambda query: explain_df(query, df), |
| inputs=query_input, |
| outputs=response_output |
| ) |
| |
| if __name__ == "__main__": |
| demo.launch(share=True) |