File size: 2,807 Bytes
cae5faa
907f20d
 
a241108
907f20d
f3dad8e
 
 
 
d770b4b
f3dad8e
d770b4b
3f2bf0c
 
a241108
907f20d
a241108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
907f20d
 
a241108
907f20d
a241108
 
 
 
 
 
907f20d
a241108
907f20d
a241108
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import pandas as pd
import streamlit as st
from datasets import load_dataset

os.environ["HF_HOME"] = "/data/huggingface"
os.environ["HF_DATASETS_CACHE"] = "/data/huggingface/datasets"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/data/huggingface/hub"
os.environ["TRANSFORMERS_CACHE"] = "/data/huggingface/transformers"

os.makedirs("/data/huggingface", exist_ok=True)



tab1, tab2, tab3 = st.tabs(["Benchmark", "Data viz", "Data explorer"])

with tab1:
    
    st.title("Professor Layton LLM Benchmark")
    
    st.caption("Only text questions")
    data = {
        "provider": ["Open AI", "Open AI", "Open AI", "Mistral"],
        "models": ["o3", "o4-mini", "GPT-4o", "Medium 3"],
        "release": ["2025-04-16", "2025-04-16", "2025-04-16", "2025-04-16"],
        "accuracy": [0.85, 0.90, 0.88, 0.56],
        "pika": [30000, 34000, 23000, 45000],
        "cost": [0.10, 0.15, 0.08, 0.3],
        "speed": [120, 100, 150, 455]
    }
    
    df = pd.DataFrame(data)
    st.dataframe(df)
    
    st.caption("Only text questions with hints")
    data = {
        "provider": ["Open AI", "Open AI", "Open AI", "Mistral"],
        "models": ["o3", "o4-mini", "GPT-4o", "Pixtral Large"],
        "release": ["2025-04-16", "2025-04-16", "2025-04-16", "2025-04-16"],
        "0-hint accuracy": [0.85, 0.90, 0.88, 0.56],
        "2-hint accuracy": [0.85, 0.90, 0.88, 0.56],
        "3-hint accuracy": [0.85, 0.90, 0.88, 0.56],
        "4-hint accuracy": [0.85, 0.90, 0.88, 0.56],
    }
    
    df = pd.DataFrame(data)
    st.dataframe(df)
    
    st.caption("Text and image questions")
    data = {
        "provider": ["Open AI", "Open AI", "Open AI", "Mistral"],
        "models": ["o3", "o4-mini", "GPT-4o", "Pixtral Large"],
        "release": ["2025-04-16", "2025-04-16", "2025-04-16", "2025-04-16"],
        "accuracy": [0.85, 0.90, 0.88, 0.56],
        "pika": [30000, 34000, 23000, 45000],
        "cost": [0.10, 0.15, 0.08, 0.3],
        "speed": [120, 100, 150, 455]
    }
    
    df = pd.DataFrame(data)
    st.dataframe(df)
    
    st.caption("Text and image questions with hints")
    data = {
        "provider": ["Open AI", "Open AI", "Open AI", "Mistral"],
        "models": ["o3", "o4-mini", "GPT-4o", "Pixtral Large"],
        "release": ["2025-04-16", "2025-04-16", "2025-04-16", "2025-04-16"],
        "0-hint accuracy": [0.85, 0.90, 0.88, 0.56],
        "2-hint accuracy": [0.85, 0.90, 0.88, 0.56],
        "3-hint accuracy": [0.85, 0.90, 0.88, 0.56],
        "4-hint accuracy": [0.85, 0.90, 0.88, 0.56],
    }
    
    df = pd.DataFrame(data)
    st.dataframe(df)


with tab2:

    dataset = load_dataset("cmenasse/layton")
    df = dataset.to_pandas()
    
    # Show the table
    st.dataframe(df)
       

with tab3:

    st.markdown("This is an example of a third tab.")