first commit
Browse files
app.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from html import escape
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
st.set_page_config(layout="wide")
|
| 8 |
+
|
| 9 |
+
column_config = {
|
| 10 |
+
"Downloads": st.column_config.NumberColumn(
|
| 11 |
+
"Downloads", format="%d 📥"
|
| 12 |
+
),
|
| 13 |
+
"Likes": st.column_config.NumberColumn(
|
| 14 |
+
"Likes", format="%d ❤️"
|
| 15 |
+
),
|
| 16 |
+
"Hugging Face URL": st.column_config.LinkColumn("Hugging Face URL", display_text="Open"),
|
| 17 |
+
"Arxiv URL": st.column_config.LinkColumn("Arxiv URL", display_text="Open"),
|
| 18 |
+
"PapersWithCode URL": st.column_config.LinkColumn("PapersWithCode URL", display_text="Open")
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@st.cache_data
|
| 23 |
+
def load_data():
|
| 24 |
+
file_path = 'HuggingFaceBenchmarkDatasetsWithTags - Copy of HuggingFaceBenchmarkDatasetsWithTags (1).csv'
|
| 25 |
+
data = pd.read_csv(file_path, na_values=['NA', ''])
|
| 26 |
+
|
| 27 |
+
data['Created At'] = pd.to_datetime(data['Created At'], errors='coerce')
|
| 28 |
+
data['Last Modified'] = pd.to_datetime(data['Last Modified'], errors='coerce')
|
| 29 |
+
|
| 30 |
+
numeric_cols = ['Downloads', 'Likes', 'Total Examples', 'Dataset Size (bytes)']
|
| 31 |
+
for col in numeric_cols:
|
| 32 |
+
data[col] = pd.to_numeric(data[col], errors='coerce')
|
| 33 |
+
|
| 34 |
+
data.replace("", np.nan, inplace=True)
|
| 35 |
+
|
| 36 |
+
data = data.drop(columns=['Card Data', 'Model Card README'])
|
| 37 |
+
return data
|
| 38 |
+
|
| 39 |
+
def escape_html(val):
|
| 40 |
+
return escape(val) if isinstance(val, str) else val
|
| 41 |
+
|
| 42 |
+
df = load_data()
|
| 43 |
+
|
| 44 |
+
st.title('Bench1k: LLM Benchmarks & Evals Database')
|
| 45 |
+
st.subheader('Explore 1,327+ benchmarks. By default, sorted by # of downloads.')
|
| 46 |
+
st.write("Use the sidebar to apply filters.")
|
| 47 |
+
|
| 48 |
+
search_query = st.text_input("Search benchmarks by keyword")
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
task_ids = df['Task IDs'].dropna().unique().tolist()
|
| 52 |
+
selected_task_id = st.sidebar.multiselect('Filter by Task IDs', task_ids)
|
| 53 |
+
|
| 54 |
+
task_categories = df['Task Categories'].dropna().unique().tolist()
|
| 55 |
+
selected_task_category = st.sidebar.multiselect('Filter by Task Categories', task_categories)
|
| 56 |
+
|
| 57 |
+
licenses = df['Licenses'].dropna().unique().tolist()
|
| 58 |
+
selected_license = st.sidebar.multiselect('Filter by License', licenses)
|
| 59 |
+
|
| 60 |
+
min_likes, max_likes = int(df['Likes'].min(skipna=True)), int(df['Likes'].max(skipna=True))
|
| 61 |
+
selected_likes = st.sidebar.slider('Filter by Likes', min_likes, max_likes, (min_likes, max_likes))
|
| 62 |
+
|
| 63 |
+
min_size, max_size = df['Dataset Size (bytes)'].min(skipna=True), df['Dataset Size (bytes)'].max(skipna=True)
|
| 64 |
+
selected_size = st.sidebar.slider('Filter by Dataset Size (bytes)', min_size, max_size, (min_size, max_size))
|
| 65 |
+
|
| 66 |
+
min_examples, max_examples = df['Total Examples'].min(skipna=True), df['Total Examples'].max(skipna=True)
|
| 67 |
+
selected_examples = st.sidebar.slider('Filter by Total Examples', min_examples, max_examples, (min_examples, max_examples))
|
| 68 |
+
|
| 69 |
+
filtered_df = df
|
| 70 |
+
|
| 71 |
+
if search_query:
|
| 72 |
+
search_cols = df.select_dtypes(include=[object]).columns
|
| 73 |
+
filtered_df = filtered_df[filtered_df[search_cols].apply(lambda x: x.str.contains(search_query, case=False, na=False)).any(axis=1)]
|
| 74 |
+
|
| 75 |
+
if selected_task_id:
|
| 76 |
+
mask_task_id = filtered_df['Task IDs'].apply(lambda x: any(task_id.strip() in str(x).split(',') for task_id in selected_task_id))
|
| 77 |
+
filtered_df = filtered_df[mask_task_id]
|
| 78 |
+
|
| 79 |
+
if selected_task_category:
|
| 80 |
+
mask_task_category = filtered_df['Task Categories'].apply(lambda x: any(category.strip() in str(x).split(',') for category in selected_task_category))
|
| 81 |
+
filtered_df = filtered_df[mask_task_category]
|
| 82 |
+
|
| 83 |
+
if selected_license:
|
| 84 |
+
filtered_df = filtered_df[filtered_df['Licenses'].isin(selected_license)]
|
| 85 |
+
|
| 86 |
+
if selected_likes:
|
| 87 |
+
filtered_df = filtered_df[filtered_df['Likes'].between(selected_likes[0], selected_likes[1])]
|
| 88 |
+
|
| 89 |
+
if selected_size:
|
| 90 |
+
filtered_df = filtered_df[filtered_df['Dataset Size (bytes)'].between(selected_size[0], selected_size[1])]
|
| 91 |
+
|
| 92 |
+
if selected_examples:
|
| 93 |
+
filtered_df = filtered_df[filtered_df['Total Examples'].between(selected_examples[0], selected_examples[1])]
|
| 94 |
+
|
| 95 |
+
def clean_html_sensitive_content(val):
|
| 96 |
+
if isinstance(val, str):
|
| 97 |
+
val = ''.join(e for e in val if e.isalnum() or e in [' ', '-', '_'])
|
| 98 |
+
return val
|
| 99 |
+
|
| 100 |
+
filtered_df['Task IDs'] = filtered_df['Task IDs'].apply(clean_html_sensitive_content)
|
| 101 |
+
filtered_df['Task Categories'] = filtered_df['Task Categories'].apply(clean_html_sensitive_content)
|
| 102 |
+
|
| 103 |
+
st.dataframe(filtered_df, column_config=column_config, hide_index=True)
|
| 104 |
+
|
| 105 |
+
st.sidebar.info("Use the filters above to explore different aspects of the benchmark datasets.")
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
@st.cache_data
|
| 109 |
+
def convert_df(df):
|
| 110 |
+
return df.to_csv().encode('utf-8')
|
| 111 |
+
|
| 112 |
+
csv = convert_df(df)
|
| 113 |
+
|
| 114 |
+
st.download_button(
|
| 115 |
+
label="Download database as CSV",
|
| 116 |
+
data=csv,
|
| 117 |
+
file_name='bench1k_database_full.csv',
|
| 118 |
+
mime='text/csv',
|
| 119 |
+
)
|