MaziyarPanahi commited on
Commit
b145c4b
·
1 Parent(s): 254bcd7

first final design

Browse files
Files changed (1) hide show
  1. app.py +76 -53
app.py CHANGED
@@ -52,6 +52,7 @@ def extract_size(model_name):
52
  return 0
53
 
54
  df['Size'] = df['Model Name'].apply(extract_size)
 
55
 
56
  # Add size category for filtering
57
  def get_size_category(size):
@@ -87,13 +88,17 @@ def filter_and_search_models(search_query, size_ranges, sort_by):
87
  if sort_by in filtered_df.columns:
88
  filtered_df = filtered_df.sort_values(sort_by, ascending=False)
89
 
90
- # Select only the columns to display
91
- display_df = filtered_df[['Model Name', 'Separate Grounding Score',
92
  'Separate Quality Score', 'Combined Score']]
93
 
 
 
 
94
  # Round numerical values for better display
95
  for col in ['Separate Grounding Score', 'Separate Quality Score', 'Combined Score']:
96
- display_df.loc[:, col] = display_df[col].round(6)
 
97
 
98
  return display_df
99
 
@@ -102,61 +107,60 @@ with gr.Blocks(title="FACT Leaderboard", theme=gr.themes.Base()) as app:
102
  gr.Markdown("# 🏆 FACT Leaderboard")
103
  gr.Markdown("### Benchmark for evaluating factuality in language models")
104
 
 
105
  with gr.Row():
106
- with gr.Column(scale=1):
107
- # Search box
108
  search_box = gr.Textbox(
109
  label="Model Search",
110
- placeholder="Search for a model...",
111
  value=""
112
  )
113
-
114
- # Size range filter
115
- gr.Markdown("**Filter by Model Size**")
116
- size_checkboxes = gr.CheckboxGroup(
117
- choices=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
118
- value=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
119
- label="",
120
- elem_classes="size-filter"
121
- )
122
-
123
- # Sort by dropdown
124
- gr.Markdown("**Sort by Metric**")
125
  sort_dropdown = gr.Dropdown(
126
  choices=["Combined Score", "Separate Grounding Score", "Separate Quality Score"],
127
  value="Combined Score",
128
- label="",
129
  elem_classes="sort-dropdown"
130
  )
131
-
132
- # Add legend/explanation
133
- gr.Markdown("---")
134
- gr.Markdown("**Metric Explanations:**")
135
- gr.Markdown("""
136
- - **Grounding Score**: Measures factual accuracy
137
- - **Quality Score**: Measures response quality
138
- - **Combined Score**: Overall performance metric
139
- """)
140
-
141
- with gr.Column(scale=3):
142
- # Results table
143
- results_table = gr.Dataframe(
144
- value=filter_and_search_models("", ["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"], "Combined Score"),
145
- headers=["Model Name", "Separate Grounding Score",
146
- "Separate Quality Score", "Combined Score"],
147
- datatype=["str", "number", "number", "number"],
148
- elem_id="leaderboard-table",
149
- interactive=False,
150
- wrap=True
151
- )
152
-
153
- # Add statistics
154
- total_models = gr.Markdown(f"**Total Models: {len(df)}**")
 
 
 
 
 
 
 
 
 
155
 
156
  # Update table when filters change
157
  def update_table(search, sizes, sort_by):
158
  filtered_df = filter_and_search_models(search, sizes, sort_by)
159
- model_count = f"**Total Models: {len(filtered_df)}**"
160
  return filtered_df, model_count
161
 
162
  # Connect all inputs to the update function
@@ -182,28 +186,41 @@ with gr.Blocks(title="FACT Leaderboard", theme=gr.themes.Base()) as app:
182
  app.css = """
183
  #leaderboard-table {
184
  font-size: 14px;
 
 
 
185
  }
186
 
187
  #leaderboard-table td:first-child {
188
  font-weight: 500;
 
189
  }
190
 
191
- #leaderboard-table td:not(:first-child) {
 
 
 
 
 
 
192
  text-align: center;
193
  }
194
 
 
 
 
 
 
 
 
195
  .size-filter label {
196
  display: flex;
197
  align-items: center;
198
- margin: 5px 0;
199
  }
200
 
201
  .size-filter input[type="checkbox"] {
202
- margin-right: 8px;
203
- }
204
-
205
- .sort-dropdown {
206
- margin-top: 10px;
207
  }
208
 
209
  /* Highlight rows based on model family */
@@ -216,12 +233,18 @@ with gr.Blocks(title="FACT Leaderboard", theme=gr.themes.Base()) as app:
216
  }
217
 
218
  #leaderboard-table tr:has(td:contains("Qwen")) {
219
- background-color: #f0fff0;
220
  }
221
 
222
  #leaderboard-table tr:has(td:contains("google")) {
223
  background-color: #fff0f5;
224
  }
 
 
 
 
 
 
225
  """
226
 
227
  # To load from CSV file, replace the sample data with:
@@ -230,4 +253,4 @@ with gr.Blocks(title="FACT Leaderboard", theme=gr.themes.Base()) as app:
230
 
231
  # Launch the app
232
  if __name__ == "__main__":
233
- app.launch(share=True)
 
52
  return 0
53
 
54
  df['Size'] = df['Model Name'].apply(extract_size)
55
+ df['Size_Display'] = df['Size'].apply(lambda x: f"{x}B" if x > 0 else "Unknown")
56
 
57
  # Add size category for filtering
58
  def get_size_category(size):
 
88
  if sort_by in filtered_df.columns:
89
  filtered_df = filtered_df.sort_values(sort_by, ascending=False)
90
 
91
+ # Select columns to display (including Size)
92
+ display_df = filtered_df[['Model Name', 'Size_Display', 'Separate Grounding Score',
93
  'Separate Quality Score', 'Combined Score']]
94
 
95
+ # Rename Size_Display to Size for cleaner display
96
+ display_df = display_df.rename(columns={'Size_Display': 'Size'})
97
+
98
  # Round numerical values for better display
99
  for col in ['Separate Grounding Score', 'Separate Quality Score', 'Combined Score']:
100
+ display_df = display_df.copy() # Create a copy to avoid SettingWithCopyWarning
101
+ display_df[col] = display_df[col].round(6)
102
 
103
  return display_df
104
 
 
107
  gr.Markdown("# 🏆 FACT Leaderboard")
108
  gr.Markdown("### Benchmark for evaluating factuality in language models")
109
 
110
+ # Filters at the top
111
  with gr.Row():
112
+ with gr.Column(scale=2):
 
113
  search_box = gr.Textbox(
114
  label="Model Search",
115
+ placeholder="Search for a model name...",
116
  value=""
117
  )
118
+
119
+ with gr.Column(scale=1):
 
 
 
 
 
 
 
 
 
 
120
  sort_dropdown = gr.Dropdown(
121
  choices=["Combined Score", "Separate Grounding Score", "Separate Quality Score"],
122
  value="Combined Score",
123
+ label="Sort by",
124
  elem_classes="sort-dropdown"
125
  )
126
+
127
+ # Size filters in a row
128
+ with gr.Row():
129
+ gr.Markdown("**Filter by Model Size:**")
130
+ size_checkboxes = gr.CheckboxGroup(
131
+ choices=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
132
+ value=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
133
+ label="",
134
+ elem_classes="size-filter",
135
+ container=False
136
+ )
137
+
138
+ # Model count
139
+ total_models = gr.Markdown(f"**Showing {len(df)} models**")
140
+
141
+ # Results table below filters
142
+ results_table = gr.Dataframe(
143
+ value=filter_and_search_models("", ["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"], "Combined Score"),
144
+ headers=["Model Name", "Size", "Separate Grounding Score",
145
+ "Separate Quality Score", "Combined Score"],
146
+ datatype=["str", "str", "number", "number", "number"],
147
+ elem_id="leaderboard-table",
148
+ interactive=False,
149
+ wrap=True
150
+ )
151
+
152
+ # Metric explanations at the bottom
153
+ with gr.Accordion("Metric Explanations", open=False):
154
+ gr.Markdown("""
155
+ - **Grounding Score**: Measures the model's ability to provide factually accurate responses based on given context
156
+ - **Quality Score**: Evaluates the overall quality of the model's responses including coherence and relevance
157
+ - **Combined Score**: A weighted combination of grounding and quality scores representing overall performance
158
+ """)
159
 
160
  # Update table when filters change
161
  def update_table(search, sizes, sort_by):
162
  filtered_df = filter_and_search_models(search, sizes, sort_by)
163
+ model_count = f"**Showing {len(filtered_df)} models**"
164
  return filtered_df, model_count
165
 
166
  # Connect all inputs to the update function
 
186
  app.css = """
187
  #leaderboard-table {
188
  font-size: 14px;
189
+ margin-top: 20px;
190
+ max-height: 600px;
191
+ overflow-y: auto;
192
  }
193
 
194
  #leaderboard-table td:first-child {
195
  font-weight: 500;
196
+ max-width: 400px;
197
  }
198
 
199
+ #leaderboard-table td:nth-child(2) {
200
+ text-align: center;
201
+ font-weight: 500;
202
+ color: #666;
203
+ }
204
+
205
+ #leaderboard-table td:nth-child(n+3) {
206
  text-align: center;
207
  }
208
 
209
+ .size-filter {
210
+ display: flex;
211
+ flex-wrap: wrap;
212
+ gap: 15px;
213
+ margin-top: 10px;
214
+ }
215
+
216
  .size-filter label {
217
  display: flex;
218
  align-items: center;
219
+ margin: 0;
220
  }
221
 
222
  .size-filter input[type="checkbox"] {
223
+ margin-right: 5px;
 
 
 
 
224
  }
225
 
226
  /* Highlight rows based on model family */
 
233
  }
234
 
235
  #leaderboard-table tr:has(td:contains("Qwen")) {
236
+ background-color: #f5fff5;
237
  }
238
 
239
  #leaderboard-table tr:has(td:contains("google")) {
240
  background-color: #fff0f5;
241
  }
242
+
243
+ /* Header styling */
244
+ #leaderboard-table th {
245
+ background-color: #f8f9fa;
246
+ font-weight: 600;
247
+ }
248
  """
249
 
250
  # To load from CSV file, replace the sample data with:
 
253
 
254
  # Launch the app
255
  if __name__ == "__main__":
256
+ app.launch()