Spaces:
Build error
Build error
| # matplotlib.use('macosx') | |
| import gradio as gr | |
| import plotly.graph_objects as go | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| from get_data_info import plot_data, tab_data | |
| def create_data_interface(df): | |
| headers = df.columns | |
| types = ["str"] + ["number"] * (len(headers) - 1) | |
| return gr.components.Dataframe( | |
| value=df.values.tolist(), | |
| headers=[col_name for col_name in headers], | |
| datatype=types, | |
| # max_rows=10, | |
| ) | |
| def plot_radar_chart(df, attributes, category_name): | |
| fig = go.Figure() | |
| for index, row in df.iterrows(): | |
| model = row['Model'] | |
| values = row[attributes].tolist() | |
| fig.add_trace(go.Scatterpolar( | |
| r=values, | |
| theta=attributes, | |
| fill='toself', | |
| name=model | |
| )) | |
| fig.update_layout( | |
| title=f"{category_name}", | |
| polar=dict( | |
| radialaxis=dict( | |
| visible=True, | |
| range=[0, 100] # | |
| )), | |
| showlegend=True | |
| ) | |
| return fig | |
| def create_data_interface_for_aggregated(df, category_name): | |
| attributes = df.columns[1:] | |
| print(f"attributes: {attributes}") | |
| plt = plot_radar_chart(df, attributes, category_name) | |
| return plt | |
| def reindex_cols(fix_cols, df): | |
| # reindex with task_col | |
| task_col = [subtask for subtask in fix_cols if subtask in df.columns.values.tolist()] | |
| df = df[task_col] | |
| return df | |
| def launch_gradio(df1, df2): | |
| demo = gr.Blocks() | |
| with demo: | |
| gr.HTML(TITLE) | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| with gr.Row(): | |
| for key, df in df1.items(): | |
| if key == "Overall" or key == "Basic Information Retrieval": | |
| df = df.replace('', 0) | |
| new_df = df[[val for val in df.columns]].copy() | |
| # new_df = reindex_cols(Task_COLS, new_df) | |
| print(f"{key}: \n{new_df}") | |
| plot = create_data_interface_for_aggregated(new_df, key) | |
| gr.Plot(plot) | |
| del new_df | |
| with gr.Row(): | |
| for key, df in df1.items(): | |
| if key == "Legal Foundation Inference" or key == "Complex Legal Application": | |
| # if True: | |
| df = df.replace('', 0) | |
| new_df = df[[val for val in df.columns]].copy() | |
| # new_df = reindex_cols(Task_COLS, new_df) | |
| print(f"{key}: \n{new_df}") | |
| plot = create_data_interface_for_aggregated(new_df, key) | |
| gr.Plot(plot) | |
| del new_df | |
| for key, df in df2.items(): | |
| # if key != "Overall": | |
| if True: | |
| with gr.Tab(key): | |
| # df = reindex_cols(Task_COLS, df) | |
| create_data_interface(df) | |
| demo.launch() | |
| if __name__ == "__main__": | |
| df1 = plot_data() | |
| df2 = tab_data() | |
| # Constants | |
| TITLE = '<h1 align="center" id="space-title">βοΈ LAiW Leaderboard</h1>' | |
| INTRODUCTION_TEXT = """π The LAiW Leaderboard is designed to rigorously track, rank, and evaluate state-of-the-art Large Language Models in Legal. | |
| π‘ Our leaderboard not only covers basic Legal NLP tasks but also incorporates Legal practice tasks such as similar case matching, offering a more comprehensive evaluation for real-world Legal applications. | |
| π Our evaluation metrics include, but are not limited to, Accuracy, F1 Score, ROUGE score, and Matthews correlation coefficient (MCC), providing a multidimensional assessment of model performance. | |
| π For more details, refer to our GitHub page [here](https://github.com/Dai-shen/LAiW). | |
| """ | |
| scheduler = BackgroundScheduler() | |
| scheduler.add_job(launch_gradio(df1=df1, df2=df2), "interval", seconds=3600) | |
| scheduler.start() | |
| # Launch immediately | |
| launch_gradio(df1=df1, df2=df2) | |