foo-barrr commited on
Commit
cf68414
Β·
1 Parent(s): 23b13b0

Simple Gradio app to display models and scores from a Google sheet

Browse files
Files changed (2) hide show
  1. app.py +73 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import requests
4
+ from io import StringIO
5
+
6
+ # Load data from Google Sheets
7
+ def load_data_from_google_sheets():
8
+ SHEET_ID = "19HEHUtljTu1jaScgOvRup8mHvh_2gA3ctMtqdNjOutI"
9
+
10
+ # CSV export URL format for Google Sheets
11
+ url = f"https://docs.google.com/spreadsheets/d/{SHEET_ID}/export?format=csv"
12
+
13
+ try:
14
+ response = requests.get(url)
15
+ response.raise_for_status()
16
+
17
+ # Read CSV data into pandas DataFrame
18
+ df = pd.read_csv(StringIO(response.text))
19
+
20
+ # Convert DataFrame to list of lists for Gradio
21
+ data = df.values.tolist()
22
+ headers = df.columns.tolist()
23
+
24
+ return data, headers
25
+
26
+ except Exception as e:
27
+ print(f"Error loading Google Sheets data: {e}")
28
+
29
+ # Load the data from Google Sheets
30
+ leaderboard_data, headers = load_data_from_google_sheets()
31
+
32
+ # Create the Gradio interface
33
+ with gr.Blocks(title="LLM Propensity Evaluation Leaderboard") as demo:
34
+ gr.Markdown("# πŸ›‘οΈ LLM Propensity Evaluation Leaderboard")
35
+ gr.Markdown("Measuring propensities / alignment traits of the most downloaded models on HuggingFace")
36
+
37
+ # Add methodology or description
38
+ with gr.Accordion("πŸ“‹ Evaluation Methodology", open=False):
39
+ gr.Markdown("""
40
+ **Evaluation Details:**
41
+ - **Instruction Following Score**: Measures a model's tendency to follow instructions accurately. Measured using the IFEval dataset.
42
+ - **Hallucination Rate**: Evaluates how often a model hallucinates. Measured using a subset of the SimpleQA dataset. We calculated the rate using this formula : (1 - (correct + not_attempted)), where correct = when the model answered a question correctly and not_attempted = when a model admits to not knowing the answer to a question.*
43
+ """)
44
+
45
+ # Add refresh functionality
46
+ def refresh_data():
47
+ data, cols = load_data_from_google_sheets()
48
+ return gr.Dataframe(value=data, headers=cols)
49
+
50
+ refresh_btn = gr.Button("πŸ”„ Refresh Data")
51
+
52
+ # Create the leaderboard
53
+ leaderboard = gr.Dataframe(
54
+ value=leaderboard_data,
55
+ headers=headers,
56
+ datatype=["str", "number", "number"],
57
+ interactive=False,
58
+ wrap=True
59
+ )
60
+
61
+ # Connect refresh button
62
+ refresh_btn.click(refresh_data, outputs=leaderboard)
63
+
64
+ # Add footer information
65
+ gr.Markdown("""
66
+ ---
67
+ **Last Updated**: Sep 11, 2025
68
+ **Contact**: <TBD>
69
+ """)
70
+
71
+ # Launch the app
72
+ if __name__ == "__main__":
73
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ pandas
3
+ requests