Spaces:
Sleeping
Sleeping
File size: 2,807 Bytes
cae5faa 907f20d a241108 907f20d f3dad8e d770b4b f3dad8e d770b4b 3f2bf0c a241108 907f20d a241108 907f20d a241108 907f20d a241108 907f20d a241108 907f20d a241108 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | import os
import pandas as pd
import streamlit as st
from datasets import load_dataset
os.environ["HF_HOME"] = "/data/huggingface"
os.environ["HF_DATASETS_CACHE"] = "/data/huggingface/datasets"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/data/huggingface/hub"
os.environ["TRANSFORMERS_CACHE"] = "/data/huggingface/transformers"
os.makedirs("/data/huggingface", exist_ok=True)
tab1, tab2, tab3 = st.tabs(["Benchmark", "Data viz", "Data explorer"])
with tab1:
st.title("Professor Layton LLM Benchmark")
st.caption("Only text questions")
data = {
"provider": ["Open AI", "Open AI", "Open AI", "Mistral"],
"models": ["o3", "o4-mini", "GPT-4o", "Medium 3"],
"release": ["2025-04-16", "2025-04-16", "2025-04-16", "2025-04-16"],
"accuracy": [0.85, 0.90, 0.88, 0.56],
"pika": [30000, 34000, 23000, 45000],
"cost": [0.10, 0.15, 0.08, 0.3],
"speed": [120, 100, 150, 455]
}
df = pd.DataFrame(data)
st.dataframe(df)
st.caption("Only text questions with hints")
data = {
"provider": ["Open AI", "Open AI", "Open AI", "Mistral"],
"models": ["o3", "o4-mini", "GPT-4o", "Pixtral Large"],
"release": ["2025-04-16", "2025-04-16", "2025-04-16", "2025-04-16"],
"0-hint accuracy": [0.85, 0.90, 0.88, 0.56],
"2-hint accuracy": [0.85, 0.90, 0.88, 0.56],
"3-hint accuracy": [0.85, 0.90, 0.88, 0.56],
"4-hint accuracy": [0.85, 0.90, 0.88, 0.56],
}
df = pd.DataFrame(data)
st.dataframe(df)
st.caption("Text and image questions")
data = {
"provider": ["Open AI", "Open AI", "Open AI", "Mistral"],
"models": ["o3", "o4-mini", "GPT-4o", "Pixtral Large"],
"release": ["2025-04-16", "2025-04-16", "2025-04-16", "2025-04-16"],
"accuracy": [0.85, 0.90, 0.88, 0.56],
"pika": [30000, 34000, 23000, 45000],
"cost": [0.10, 0.15, 0.08, 0.3],
"speed": [120, 100, 150, 455]
}
df = pd.DataFrame(data)
st.dataframe(df)
st.caption("Text and image questions with hints")
data = {
"provider": ["Open AI", "Open AI", "Open AI", "Mistral"],
"models": ["o3", "o4-mini", "GPT-4o", "Pixtral Large"],
"release": ["2025-04-16", "2025-04-16", "2025-04-16", "2025-04-16"],
"0-hint accuracy": [0.85, 0.90, 0.88, 0.56],
"2-hint accuracy": [0.85, 0.90, 0.88, 0.56],
"3-hint accuracy": [0.85, 0.90, 0.88, 0.56],
"4-hint accuracy": [0.85, 0.90, 0.88, 0.56],
}
df = pd.DataFrame(data)
st.dataframe(df)
with tab2:
dataset = load_dataset("cmenasse/layton")
df = dataset.to_pandas()
# Show the table
st.dataframe(df)
with tab3:
st.markdown("This is an example of a third tab.") |