Spaces:
Sleeping
Sleeping
Adding application
Browse files- app.py +3 -0
- public/datasets/models_scores.csv +0 -0
- requirements.txt +57 -0
- src/__init__.py +1 -0
- src/__pycache__/__init__.cpython-38.pyc +0 -0
- src/__pycache__/display.cpython-38.pyc +0 -0
- src/__pycache__/load_data.cpython-38.pyc +0 -0
- src/__pycache__/plot.cpython-38.pyc +0 -0
- src/content.py +1 -0
- src/display.py +80 -0
- src/load_data.py +26 -0
- src/plot.py +102 -0
app.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.display import display_app
|
| 2 |
+
|
| 3 |
+
display_app()
|
public/datasets/models_scores.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
altair==5.2.0
|
| 2 |
+
attrs==23.2.0
|
| 3 |
+
backports.zoneinfo==0.2.1
|
| 4 |
+
blinker==1.7.0
|
| 5 |
+
cachetools==5.3.3
|
| 6 |
+
certifi==2024.2.2
|
| 7 |
+
charset-normalizer==3.3.2
|
| 8 |
+
click==8.1.7
|
| 9 |
+
contourpy==1.1.1
|
| 10 |
+
cycler==0.12.1
|
| 11 |
+
fonttools==4.49.0
|
| 12 |
+
gitdb==4.0.11
|
| 13 |
+
GitPython==3.1.42
|
| 14 |
+
idna==3.6
|
| 15 |
+
importlib-metadata==7.0.1
|
| 16 |
+
importlib_resources==6.1.2
|
| 17 |
+
Jinja2==3.1.3
|
| 18 |
+
jsonschema==4.21.1
|
| 19 |
+
jsonschema-specifications==2023.12.1
|
| 20 |
+
kiwisolver==1.4.5
|
| 21 |
+
markdown-it-py==3.0.0
|
| 22 |
+
MarkupSafe==2.1.5
|
| 23 |
+
matplotlib==3.7.5
|
| 24 |
+
mdurl==0.1.2
|
| 25 |
+
numpy==1.24.4
|
| 26 |
+
packaging==23.2
|
| 27 |
+
pandas==2.0.3
|
| 28 |
+
pillow==10.2.0
|
| 29 |
+
pkgutil_resolve_name==1.3.10
|
| 30 |
+
plotly==5.19.0
|
| 31 |
+
protobuf==4.25.3
|
| 32 |
+
pyarrow==15.0.0
|
| 33 |
+
pydeck==0.8.1b0
|
| 34 |
+
Pygments==2.17.2
|
| 35 |
+
pyparsing==3.1.1
|
| 36 |
+
python-dateutil==2.9.0.post0
|
| 37 |
+
python-decouple==3.8
|
| 38 |
+
pytz==2024.1
|
| 39 |
+
referencing==0.33.0
|
| 40 |
+
requests==2.31.0
|
| 41 |
+
rich==13.7.1
|
| 42 |
+
rpds-py==0.18.0
|
| 43 |
+
six==1.16.0
|
| 44 |
+
smmap==5.0.1
|
| 45 |
+
streamlit==1.31.1
|
| 46 |
+
streamlit-aggrid==0.3.4.post3
|
| 47 |
+
tenacity==8.2.3
|
| 48 |
+
toml==0.10.2
|
| 49 |
+
toolz==0.12.1
|
| 50 |
+
tornado==6.4
|
| 51 |
+
typing_extensions==4.10.0
|
| 52 |
+
tzdata==2024.1
|
| 53 |
+
tzlocal==5.2
|
| 54 |
+
urllib3==2.2.1
|
| 55 |
+
validators==0.22.0
|
| 56 |
+
watchdog==4.0.0
|
| 57 |
+
zipp==3.17.0
|
src/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# src/__init__.py
|
src/__pycache__/__init__.cpython-38.pyc
ADDED
|
Binary file (140 Bytes). View file
|
|
|
src/__pycache__/display.cpython-38.pyc
ADDED
|
Binary file (2.25 kB). View file
|
|
|
src/__pycache__/load_data.cpython-38.pyc
ADDED
|
Binary file (1.08 kB). View file
|
|
|
src/__pycache__/plot.cpython-38.pyc
ADDED
|
Binary file (2.45 kB). View file
|
|
|
src/content.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Nothing for now
|
src/display.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from src.load_data import load_dataframe, sort_by
|
| 3 |
+
from src.plot import plot_radar_chart_index, plot_radar_chart_name
|
| 4 |
+
from st_aggrid import GridOptionsBuilder, AgGrid
|
| 5 |
+
|
| 6 |
+
def display_app():
|
| 7 |
+
st.markdown("# Open LLM Leaderboard Viz")
|
| 8 |
+
st.markdown("This is a visualization of the results in [open-llm-leaderboard/results](https://huggingface.co/datasets/open-llm-leaderboard/results)")
|
| 9 |
+
st.markdown("To select a model, click on the checkbox beside its name.")
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
#container = st.container(height = 150)
|
| 14 |
+
|
| 15 |
+
dataframe = load_dataframe()
|
| 16 |
+
|
| 17 |
+
sort_selection = st.selectbox(label = "Sort by:", options = list(dataframe.columns))
|
| 18 |
+
ascending = True
|
| 19 |
+
indexes = None
|
| 20 |
+
if sort_selection is None:
|
| 21 |
+
sort_selection = "model_name"
|
| 22 |
+
ascending = True
|
| 23 |
+
elif sort_selection == "model_name":
|
| 24 |
+
ascending = True
|
| 25 |
+
else:
|
| 26 |
+
ascending = False
|
| 27 |
+
name = st.text_input(label = ":mag: Search by name")
|
| 28 |
+
if name is not None:
|
| 29 |
+
indexes = dataframe["model_name"].str.contains(name)
|
| 30 |
+
if len(indexes) > 0:
|
| 31 |
+
dataframe = dataframe[indexes]
|
| 32 |
+
else:
|
| 33 |
+
dataframe = load_dataframe()
|
| 34 |
+
|
| 35 |
+
dataframe = sort_by(dataframe=dataframe, column_name=sort_selection, ascending= ascending)
|
| 36 |
+
dataframe_display = dataframe.copy()
|
| 37 |
+
dataframe_display[["ARC", "HellaSwag", "TruthfulQA", "Winogrande", "GSM8K" ,"MMLU", "Average"]] = dataframe[["ARC", "HellaSwag", "TruthfulQA", "Winogrande", "GSM8K" ,"MMLU", "Average"]].astype(float)
|
| 38 |
+
dataframe_display[["ARC", "HellaSwag", "TruthfulQA", "Winogrande", "GSM8K" ,"MMLU", "Average"]] = dataframe_display[["ARC", "HellaSwag", "TruthfulQA", "Winogrande", "GSM8K" ,"MMLU", "Average"]] *100
|
| 39 |
+
dataframe_display[["ARC", "HellaSwag", "TruthfulQA", "Winogrande", "GSM8K" ,"MMLU", "Average"]] = dataframe_display[["ARC", "HellaSwag", "TruthfulQA", "Winogrande", "GSM8K" ,"MMLU", "Average"]].round(2)
|
| 40 |
+
|
| 41 |
+
#Infer basic colDefs from dataframe types
|
| 42 |
+
gb = GridOptionsBuilder.from_dataframe(dataframe_display)
|
| 43 |
+
gb.configure_selection(selection_mode = "single", use_checkbox=True)
|
| 44 |
+
gb.configure_grid_options(domLayout='normal')
|
| 45 |
+
gridOptions = gb.build()
|
| 46 |
+
|
| 47 |
+
column1,col3, column2 = st.columns([0.26, 0.05, 0.69], gap = "small")
|
| 48 |
+
|
| 49 |
+
with column1:
|
| 50 |
+
#with container:
|
| 51 |
+
#st.dataframe(dataframe_display)
|
| 52 |
+
grid_response = AgGrid(
|
| 53 |
+
dataframe_display,
|
| 54 |
+
gridOptions=gridOptions,
|
| 55 |
+
height=300,
|
| 56 |
+
width='40%'
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
subdata = dataframe.head(1)
|
| 60 |
+
if len(subdata) > 0:
|
| 61 |
+
model_name = subdata["model_name"].values[0]
|
| 62 |
+
else:
|
| 63 |
+
model_name = ""
|
| 64 |
+
|
| 65 |
+
with column2:
|
| 66 |
+
if grid_response['selected_rows'] is not None and len(grid_response['selected_rows']) > 0:
|
| 67 |
+
model_name = grid_response['selected_rows'][0]["model_name"]
|
| 68 |
+
figure = plot_radar_chart_name(dataframe=dataframe, model_name=model_name)
|
| 69 |
+
st.plotly_chart(figure, use_container_width=False)
|
| 70 |
+
else:
|
| 71 |
+
if len(subdata)>0:
|
| 72 |
+
figure = plot_radar_chart_name(dataframe=subdata, model_name=model_name)
|
| 73 |
+
st.plotly_chart(figure, use_container_width=True)
|
| 74 |
+
|
| 75 |
+
if grid_response['selected_rows'] is not None and len(grid_response['selected_rows']) > 0:
|
| 76 |
+
st.markdown("**Model name:** %s" % grid_response['selected_rows'][0]["model_name"])
|
| 77 |
+
else:
|
| 78 |
+
st.markdown("**Model name:** %s" % model_name)
|
| 79 |
+
|
| 80 |
+
|
src/load_data.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
def load_dataframe() -> pd.DataFrame:
|
| 4 |
+
"""
|
| 5 |
+
Load dataframe from the csv file in public directory
|
| 6 |
+
Returns
|
| 7 |
+
dataframe: a pd.DataFrame of the average scores of the LLMs on each task
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
dataframe = pd.read_csv("public/datasets/models_scores.csv")
|
| 11 |
+
dataframe = dataframe.drop(columns = "Unnamed: 0")
|
| 12 |
+
return dataframe
|
| 13 |
+
|
| 14 |
+
def sort_by(dataframe: pd.DataFrame, column_name: str, ascending:bool = False) -> pd.DataFrame:
|
| 15 |
+
"""
|
| 16 |
+
Sort the dataframe by column_name
|
| 17 |
+
|
| 18 |
+
Arguments:
|
| 19 |
+
- dataframe: a pandas dataframe to sort
|
| 20 |
+
- column_name: a string stating the column to sort the dataframe by
|
| 21 |
+
- ascending: a boolean stating to sort in ascending order or not, default to False
|
| 22 |
+
|
| 23 |
+
Returns:
|
| 24 |
+
a sorted dataframe
|
| 25 |
+
"""
|
| 26 |
+
return dataframe.sort_values(by = column_name, ascending = ascending )
|
src/plot.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.load_data import load_dataframe
|
| 2 |
+
import plotly.graph_objects as go
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
# Hugging Face Colors
|
| 7 |
+
fillcolor = "#FFD21E"
|
| 8 |
+
line_color = "#FF9D00"
|
| 9 |
+
|
| 10 |
+
# opacity of the plot
|
| 11 |
+
opacity = 0.75
|
| 12 |
+
|
| 13 |
+
# categories to show radar chart
|
| 14 |
+
categories = ["ARC", "GSM8K", "TruthfulQA", "Winogrande", "HellaSwag", "MMLU"]
|
| 15 |
+
|
| 16 |
+
def plot_radar_chart_index(dataframe: pd.DataFrame, index: int, categories: list = categories, fillcolor: str = fillcolor, line_color:str = line_color):
|
| 17 |
+
"""
|
| 18 |
+
plot the index-th row of the dataframe
|
| 19 |
+
|
| 20 |
+
Arguments:
|
| 21 |
+
dataframe: a pandas DataFrame
|
| 22 |
+
index: the index of the row we want to plot
|
| 23 |
+
categories: the list of the metrics
|
| 24 |
+
fillcolor: a string specifying the color to fill the area
|
| 25 |
+
line_color: a string specifying the color of the lines in the graph
|
| 26 |
+
"""
|
| 27 |
+
fig = go.Figure()
|
| 28 |
+
data = dataframe.loc[index,categories].to_numpy()*100
|
| 29 |
+
data = data.astype(float)
|
| 30 |
+
# rounding data
|
| 31 |
+
data = data.round(decimals = 2)
|
| 32 |
+
|
| 33 |
+
# add data to close the area of the radar chart
|
| 34 |
+
data = np.append(data, data[0])
|
| 35 |
+
categories_theta = categories.copy()
|
| 36 |
+
categories_theta.append(categories[0])
|
| 37 |
+
model_name = dataframe.loc[index,"model_name"]
|
| 38 |
+
#print("Printing data ", data, " for ", model_name)
|
| 39 |
+
|
| 40 |
+
fig.add_trace(go.Scatterpolar(
|
| 41 |
+
r=data,
|
| 42 |
+
theta=categories_theta,
|
| 43 |
+
fill='toself',
|
| 44 |
+
fillcolor = fillcolor,
|
| 45 |
+
opacity = opacity,
|
| 46 |
+
line=dict(color = line_color),
|
| 47 |
+
name= model_name
|
| 48 |
+
))
|
| 49 |
+
fig.update_layout(
|
| 50 |
+
polar=dict(
|
| 51 |
+
radialaxis=dict(
|
| 52 |
+
visible=True,
|
| 53 |
+
range=[0, 100.]
|
| 54 |
+
)),
|
| 55 |
+
showlegend=False
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
return fig
|
| 59 |
+
|
| 60 |
+
def plot_radar_chart_name(dataframe: pd.DataFrame, model_name: str, categories: list = categories, fillcolor: str = fillcolor, line_color:str = line_color):
|
| 61 |
+
"""
|
| 62 |
+
plot the results of the model named model_name row of the dataframe
|
| 63 |
+
|
| 64 |
+
Arguments:
|
| 65 |
+
dataframe: a pandas DataFrame
|
| 66 |
+
model_name: a string stating the name of the model
|
| 67 |
+
categories: the list of the metrics
|
| 68 |
+
fillcolor: a string specifying the color to fill the area
|
| 69 |
+
line_color: a string specifying the color of the lines in the graph
|
| 70 |
+
"""
|
| 71 |
+
fig = go.Figure()
|
| 72 |
+
data = dataframe[dataframe["model_name"] == model_name][categories].to_numpy()*100
|
| 73 |
+
data = data.astype(float)
|
| 74 |
+
# rounding data
|
| 75 |
+
data = data.round(decimals = 2)
|
| 76 |
+
|
| 77 |
+
# add data to close the area of the radar chart
|
| 78 |
+
data = np.append(data, data[0])
|
| 79 |
+
categories_theta = categories.copy()
|
| 80 |
+
categories_theta.append(categories[0])
|
| 81 |
+
model_name = model_name
|
| 82 |
+
#print("Printing data ", data, " for ", model_name)
|
| 83 |
+
|
| 84 |
+
fig.add_trace(go.Scatterpolar(
|
| 85 |
+
r=data,
|
| 86 |
+
theta=categories_theta,
|
| 87 |
+
fill='toself',
|
| 88 |
+
fillcolor = fillcolor,
|
| 89 |
+
opacity = opacity,
|
| 90 |
+
line=dict(color = line_color),
|
| 91 |
+
name= model_name
|
| 92 |
+
))
|
| 93 |
+
fig.update_layout(
|
| 94 |
+
polar=dict(
|
| 95 |
+
radialaxis=dict(
|
| 96 |
+
visible=True,
|
| 97 |
+
range=[0, 100.]
|
| 98 |
+
)),
|
| 99 |
+
showlegend=False
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
return fig
|