| import streamlit as st | |
| from draw_utils import PAGE_MARKDOWN, PAGE_INFO, LENGTHS | |
| from draw_utils import load_results, style_dataframe | |
| st.set_page_config(layout="wide", page_title="Leaderboard App") | |
| st.markdown(PAGE_MARKDOWN, unsafe_allow_html=True) | |
| def draw_leaderboard(): | |
| df = load_results() | |
| tasks = ['avg'] + [f"qa{i}" for i in range(1, 11)] | |
| columns = ["model_name", "<=32k", "<=128k"] + LENGTHS | |
| st.title("πππͺ‘πβ BABILong Leaderboard π") | |
| st.markdown(PAGE_INFO) | |
| st.subheader("Evaluation results:") | |
| st.text('Each tab corresponds to a task, avg - averaged scores over qa1-5 tasks.') | |
| st.markdown('Predictions of all evaluated models: ' | |
| '[BABILong evals](https://huggingface.co/datasets/RMT-team/babilong_evals)') | |
| search_term = st.text_input("Search models:", "") | |
| tabs = st.tabs([str(task) for task in tasks]) | |
| for i, tab in enumerate(tabs): | |
| with tab: | |
| task_df = df[df.task == tasks[i]][columns] | |
| if search_term: | |
| task_df = task_df[task_df['model_name'].str.contains(search_term, case=False)] | |
| task_df.reset_index(drop=True, inplace=True) | |
| row_height = 35 | |
| height = (len(task_df) + 1) * row_height | |
| styled_df = style_dataframe(task_df).format(precision=1) | |
| st.dataframe( | |
| styled_df, | |
| width=1030, | |
| height=height, | |
| ) | |
| if __name__ == "__main__": | |
| draw_leaderboard() | |