Listever commited on
Commit
7573164
·
verified ·
1 Parent(s): 2c3173d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +429 -177
app.py CHANGED
@@ -1,204 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
- from apscheduler.schedulers.background import BackgroundScheduler
5
- from huggingface_hub import snapshot_download
6
-
7
- from src.about import (
8
- CITATION_BUTTON_LABEL,
9
- CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
- INTRODUCTION_TEXT,
12
- LLM_BENCHMARKS_TEXT,
13
- TITLE,
14
- )
15
- from src.display.css_html_js import custom_css
16
- from src.display.utils import (
17
- BENCHMARK_COLS,
18
- COLS,
19
- EVAL_COLS,
20
- EVAL_TYPES,
21
- AutoEvalColumn,
22
- ModelType,
23
- fields,
24
- WeightType,
25
- Precision
26
- )
27
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
- from src.submission.submit import add_new_eval
30
-
31
-
32
- def restart_space():
33
- API.restart_space(repo_id=REPO_ID)
34
-
35
- ### Space initialisation
36
- try:
37
- print(EVAL_REQUESTS_PATH)
38
- snapshot_download(
39
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
- )
41
- except Exception:
42
- restart_space()
43
- try:
44
- print(EVAL_RESULTS_PATH)
45
- snapshot_download(
46
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
- )
48
- except Exception:
49
- restart_space()
50
-
51
-
52
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
-
54
- (
55
- finished_eval_queue_df,
56
- running_eval_queue_df,
57
- pending_eval_queue_df,
58
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
-
60
- def init_leaderboard(dataframe):
61
- if dataframe is None or dataframe.empty:
62
- raise ValueError("Leaderboard DataFrame is empty or None.")
63
- return Leaderboard(
64
- value=dataframe,
65
- datatype=[c.type for c in fields(AutoEvalColumn)],
66
- select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
- ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
- filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
- ColumnFilter(
77
- AutoEvalColumn.params.name,
78
- type="slider",
79
- min=0.01,
80
- max=150,
81
- label="Select the number of parameters (B)",
82
- ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
- ],
87
- bool_checkboxgroup_label="Hide models",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  interactive=False,
89
- )
 
90
 
 
91
 
92
- demo = gr.Blocks(css=custom_css)
93
- with demo:
94
- gr.HTML(TITLE)
95
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
 
 
 
 
 
 
97
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
- leaderboard = init_leaderboard(LEADERBOARD_DF)
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
 
 
 
 
103
 
104
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
- with gr.Column():
106
- with gr.Row():
107
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- with gr.Column():
110
- with gr.Accordion(
111
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
112
- open=False,
113
- ):
114
- with gr.Row():
115
- finished_eval_table = gr.components.Dataframe(
116
- value=finished_eval_queue_df,
117
- headers=EVAL_COLS,
118
- datatype=EVAL_TYPES,
119
- row_count=5,
120
- )
121
- with gr.Accordion(
122
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
123
- open=False,
124
- ):
125
- with gr.Row():
126
- running_eval_table = gr.components.Dataframe(
127
- value=running_eval_queue_df,
128
- headers=EVAL_COLS,
129
- datatype=EVAL_TYPES,
130
- row_count=5,
131
- )
132
-
133
- with gr.Accordion(
134
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
- open=False,
136
- ):
137
- with gr.Row():
138
- pending_eval_table = gr.components.Dataframe(
139
- value=pending_eval_queue_df,
140
- headers=EVAL_COLS,
141
- datatype=EVAL_TYPES,
142
- row_count=5,
143
- )
144
  with gr.Row():
145
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
146
 
147
  with gr.Row():
148
  with gr.Column():
149
- model_name_textbox = gr.Textbox(label="Model name")
150
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
 
 
 
 
151
  model_type = gr.Dropdown(
152
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
153
- label="Model type",
 
 
 
 
 
154
  multiselect=False,
155
- value=None,
156
  interactive=True,
157
  )
 
 
158
 
159
  with gr.Column():
160
- precision = gr.Dropdown(
161
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
162
- label="Precision",
163
  multiselect=False,
164
- value="float16",
165
  interactive=True,
166
  )
167
- weight_type = gr.Dropdown(
168
- choices=[i.value.name for i in WeightType],
169
- label="Weights type",
170
- multiselect=False,
171
- value="Original",
172
- interactive=True,
 
 
 
173
  )
174
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
175
-
176
- submit_button = gr.Button("Submit Eval")
177
- submission_result = gr.Markdown()
178
- submit_button.click(
179
- add_new_eval,
180
- [
181
- model_name_textbox,
182
- base_model_name_textbox,
183
- revision_name_textbox,
184
- precision,
185
- weight_type,
186
- model_type,
187
- ],
188
- submission_result,
189
- )
 
 
 
 
 
 
 
 
 
190
 
191
  with gr.Row():
192
- with gr.Accordion("📙 Citation", open=False):
193
- citation_button = gr.Textbox(
194
- value=CITATION_BUTTON_TEXT,
195
- label=CITATION_BUTTON_LABEL,
196
- lines=20,
197
- elem_id="citation-button",
198
- show_copy_button=True,
199
- )
200
 
201
- scheduler = BackgroundScheduler()
202
- scheduler.add_job(restart_space, "interval", seconds=1800)
203
- scheduler.start()
204
- demo.queue(default_concurrency_limit=40).launch()
 
1
+ # import gradio as gr
2
+ # from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
+ # import pandas as pd
4
+ # from apscheduler.schedulers.background import BackgroundScheduler
5
+ # from huggingface_hub import snapshot_download
6
+
7
+ # from src.about import (
8
+ # CITATION_BUTTON_LABEL,
9
+ # CITATION_BUTTON_TEXT,
10
+ # EVALUATION_QUEUE_TEXT,
11
+ # INTRODUCTION_TEXT,
12
+ # LLM_BENCHMARKS_TEXT,
13
+ # TITLE,
14
+ # )
15
+ # from src.display.css_html_js import custom_css
16
+ # from src.display.utils import (
17
+ # BENCHMARK_COLS,
18
+ # COLS,
19
+ # EVAL_COLS,
20
+ # EVAL_TYPES,
21
+ # AutoEvalColumn,
22
+ # ModelType,
23
+ # fields,
24
+ # WeightType,
25
+ # Precision
26
+ # )
27
+ # from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
+ # from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
+ # from src.submission.submit import add_new_eval
30
+
31
+
32
+ # def restart_space():
33
+ # API.restart_space(repo_id=REPO_ID)
34
+
35
+ # ### Space initialisation
36
+ # try:
37
+ # print(EVAL_REQUESTS_PATH)
38
+ # snapshot_download(
39
+ # repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
+ # )
41
+ # except Exception:
42
+ # restart_space()
43
+ # try:
44
+ # print(EVAL_RESULTS_PATH)
45
+ # snapshot_download(
46
+ # repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
+ # )
48
+ # except Exception:
49
+ # restart_space()
50
+
51
+
52
+ # LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
+
54
+ # (
55
+ # finished_eval_queue_df,
56
+ # running_eval_queue_df,
57
+ # pending_eval_queue_df,
58
+ # ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
+
60
+ # def init_leaderboard(dataframe):
61
+ # if dataframe is None or dataframe.empty:
62
+ # raise ValueError("Leaderboard DataFrame is empty or None.")
63
+ # return Leaderboard(
64
+ # value=dataframe,
65
+ # datatype=[c.type for c in fields(AutoEvalColumn)],
66
+ # select_columns=SelectColumns(
67
+ # default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
+ # cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
+ # label="Select Columns to Display:",
70
+ # ),
71
+ # search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
+ # hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
+ # filter_columns=[
74
+ # ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
+ # ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
+ # ColumnFilter(
77
+ # AutoEvalColumn.params.name,
78
+ # type="slider",
79
+ # min=0.01,
80
+ # max=150,
81
+ # label="Select the number of parameters (B)",
82
+ # ),
83
+ # ColumnFilter(
84
+ # AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
+ # ),
86
+ # ],
87
+ # bool_checkboxgroup_label="Hide models",
88
+ # interactive=False,
89
+ # )
90
+
91
+
92
+ # demo = gr.Blocks(css=custom_css)
93
+ # with demo:
94
+ # gr.HTML(TITLE)
95
+ # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
+
97
+ # with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
+ # with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
+ # leaderboard = init_leaderboard(LEADERBOARD_DF)
100
+
101
+ # with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
+ # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
+
104
+ # with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
+ # with gr.Column():
106
+ # with gr.Row():
107
+ # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
108
+
109
+ # with gr.Column():
110
+ # with gr.Accordion(
111
+ # f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
112
+ # open=False,
113
+ # ):
114
+ # with gr.Row():
115
+ # finished_eval_table = gr.components.Dataframe(
116
+ # value=finished_eval_queue_df,
117
+ # headers=EVAL_COLS,
118
+ # datatype=EVAL_TYPES,
119
+ # row_count=5,
120
+ # )
121
+ # with gr.Accordion(
122
+ # f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
123
+ # open=False,
124
+ # ):
125
+ # with gr.Row():
126
+ # running_eval_table = gr.components.Dataframe(
127
+ # value=running_eval_queue_df,
128
+ # headers=EVAL_COLS,
129
+ # datatype=EVAL_TYPES,
130
+ # row_count=5,
131
+ # )
132
+
133
+ # with gr.Accordion(
134
+ # f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
+ # open=False,
136
+ # ):
137
+ # with gr.Row():
138
+ # pending_eval_table = gr.components.Dataframe(
139
+ # value=pending_eval_queue_df,
140
+ # headers=EVAL_COLS,
141
+ # datatype=EVAL_TYPES,
142
+ # row_count=5,
143
+ # )
144
+ # with gr.Row():
145
+ # gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
146
+
147
+ # with gr.Row():
148
+ # with gr.Column():
149
+ # model_name_textbox = gr.Textbox(label="Model name")
150
+ # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
151
+ # model_type = gr.Dropdown(
152
+ # choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
153
+ # label="Model type",
154
+ # multiselect=False,
155
+ # value=None,
156
+ # interactive=True,
157
+ # )
158
+
159
+ # with gr.Column():
160
+ # precision = gr.Dropdown(
161
+ # choices=[i.value.name for i in Precision if i != Precision.Unknown],
162
+ # label="Precision",
163
+ # multiselect=False,
164
+ # value="float16",
165
+ # interactive=True,
166
+ # )
167
+ # weight_type = gr.Dropdown(
168
+ # choices=[i.value.name for i in WeightType],
169
+ # label="Weights type",
170
+ # multiselect=False,
171
+ # value="Original",
172
+ # interactive=True,
173
+ # )
174
+ # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
175
+
176
+ # submit_button = gr.Button("Submit Eval")
177
+ # submission_result = gr.Markdown()
178
+ # submit_button.click(
179
+ # add_new_eval,
180
+ # [
181
+ # model_name_textbox,
182
+ # base_model_name_textbox,
183
+ # revision_name_textbox,
184
+ # precision,
185
+ # weight_type,
186
+ # model_type,
187
+ # ],
188
+ # submission_result,
189
+ # )
190
+
191
+ # with gr.Row():
192
+ # with gr.Accordion("📙 Citation", open=False):
193
+ # citation_button = gr.Textbox(
194
+ # value=CITATION_BUTTON_TEXT,
195
+ # label=CITATION_BUTTON_LABEL,
196
+ # lines=20,
197
+ # elem_id="citation-button",
198
+ # show_copy_button=True,
199
+ # )
200
+
201
+ # scheduler = BackgroundScheduler()
202
+ # scheduler.add_job(restart_space, "interval", seconds=1800)
203
+ # scheduler.start()
204
+ # demo.queue(default_concurrency_limit=40).launch()
205
+ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
206
+ import os
207
+
208
  import gradio as gr
 
209
  import pandas as pd
210
+ import json
211
+ import tempfile
212
+
213
+ from constants import *
214
+ from huggingface_hub import Repository
215
+ HF_TOKEN = os.environ.get("HF_TOKEN")
216
+
217
+ global data_component, filter_component
218
+
219
+ def download_csv():
220
+ # pull the results and return this file!
221
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
222
+ submission_repo.git_pull()
223
+ return CSV_DIR, gr.update(visible=True)
224
+
225
+ def upload_file(files):
226
+ file_paths = [file.name for file in files]
227
+ return file_paths
228
+
229
+ def add_new_eval(
230
+ input_file,
231
+ model_name_textbox: str,
232
+ revision_name_textbox: str,
233
+ model_type: str,
234
+ model_link: str,
235
+ model_size: str,
236
+ LLM_type: str,
237
+ LLM_name_textbox: str,
238
+ ):
239
+ if input_file is None:
240
+ return "Error! Empty file!"
241
+
242
+ upload_data=json.loads(input_file)
243
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
244
+ submission_repo.git_pull()
245
+ csv_data = pd.read_csv(CSV_DIR)
246
+
247
+ if LLM_type == 'Other':
248
+ LLM_name = LLM_name_textbox
249
+ else:
250
+ LLM_name = LLM_type
251
+
252
+ if revision_name_textbox == '':
253
+ col = csv_data.shape[0]
254
+ model_name = model_name_textbox
255
+ else:
256
+ model_name = revision_name_textbox
257
+ model_name_list = csv_data['Model']
258
+ name_list = [name.split(']')[0][1:] for name in model_name_list]
259
+ if revision_name_textbox not in name_list:
260
+ col = csv_data.shape[0]
261
+ else:
262
+ col = name_list.index(revision_name_textbox)
263
+
264
+ if model_link == '':
265
+ model_name = model_name # no url
266
+ else:
267
+ model_name = '[' + model_name + '](' + model_link + ')'
268
+
269
+ # add new data
270
+ new_data = [
271
+ model_type,
272
+ model_name,
273
+ LLM_name
274
+ ]
275
+ for key in TASK_INFO:
276
+ if key in upload_data:
277
+ new_data.append(upload_data[key])
278
+ else:
279
+ new_data.append(0)
280
+ csv_data.loc[col] = new_data
281
+ csv_data = csv_data.to_csv(CSV_DIR, index=False)
282
+ submission_repo.push_to_hub()
283
+ return 0
284
+
285
+ def get_baseline_df():
286
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
287
+ submission_repo.git_pull()
288
+ df = pd.read_csv(CSV_DIR)
289
+ df = df.sort_values(by="Avg", ascending=False)
290
+ present_columns = MODEL_INFO + checkbox_group.value
291
+ df = df[present_columns]
292
+ return df
293
+
294
+ def get_all_df():
295
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
296
+ submission_repo.git_pull()
297
+ df = pd.read_csv(CSV_DIR)
298
+ df = df.sort_values(by="Avg", ascending=False)
299
+ return df
300
+
301
+ def on_filter_model_size_method_change(selected_columns):
302
+ updated_data = get_all_df()
303
+
304
+ # columns:
305
+ selected_columns = [item for item in TASK_INFO if item in selected_columns]
306
+ present_columns = MODEL_INFO + selected_columns
307
+ # print("selected_columns",'|'.join(selected_columns))
308
+ updated_data = updated_data[present_columns]
309
+ updated_data = updated_data.sort_values(by=selected_columns[0], ascending=False)
310
+ updated_headers = present_columns
311
+ update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
312
+ # print(updated_data,present_columns,update_datatype)
313
+ filter_component = gr.components.Dataframe(
314
+ value=updated_data,
315
+ headers=updated_headers,
316
+ type="pandas",
317
+ datatype=update_datatype,
318
  interactive=False,
319
+ visible=True,
320
+ )
321
 
322
+ return filter_component#.value
323
 
324
+ block = gr.Blocks()
 
 
 
325
 
326
+
327
+ with block:
328
+ gr.Markdown(
329
+ LEADERBORAD_INTRODUCTION
330
+ )
331
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
332
+ with gr.TabItem("📊 MVBench", elem_id="mvbench-tab-table", id=1):
333
+ with gr.Row():
334
+ with gr.Accordion("Citation", open=False):
335
+ citation_button = gr.Textbox(
336
+ value=CITATION_BUTTON_TEXT,
337
+ label=CITATION_BUTTON_LABEL,
338
+ elem_id="citation-button",
339
+ lines=10,
340
+ )
341
+
342
+ gr.Markdown(
343
+ TABLE_INTRODUCTION
344
+ )
345
 
346
+ # selection for column part:
347
+ checkbox_group = gr.CheckboxGroup(
348
+ choices=TASK_INFO,
349
+ value=AVG_INFO,
350
+ label="Evaluation Dimension",
351
+ interactive=True,
352
+ )
353
 
354
+ data_component = gr.components.Dataframe(
355
+ value=get_baseline_df,
356
+ headers=COLUMN_NAMES,
357
+ type="pandas",
358
+ datatype=DATA_TITILE_TYPE,
359
+ interactive=False,
360
+ visible=True,
361
+ )
362
+
363
+
364
+ checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
365
+
366
+ # table 2
367
+ with gr.TabItem("📝 About", elem_id="mvbench-tab-table", id=2):
368
+ gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
369
+
370
+ # table 3
371
+ with gr.TabItem("🚀 Submit here! ", elem_id="mvbench-tab-table", id=3):
372
+ gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
373
+
374
+ with gr.Row():
375
+ gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
376
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  with gr.Row():
378
+ gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")
379
 
380
  with gr.Row():
381
  with gr.Column():
382
+ model_name_textbox = gr.Textbox(
383
+ label="Model name", placeholder="LLaMA-7B"
384
+ )
385
+ revision_name_textbox = gr.Textbox(
386
+ label="Revision Model Name", placeholder="LLaMA-7B"
387
+ )
388
  model_type = gr.Dropdown(
389
+ choices=[
390
+ "LLM",
391
+ "ImageLLM",
392
+ "VideoLLM",
393
+ "Other",
394
+ ],
395
+ label="Model type",
396
  multiselect=False,
397
+ value="ImageLLM",
398
  interactive=True,
399
  )
400
+
401
+
402
 
403
  with gr.Column():
404
+ LLM_type = gr.Dropdown(
405
+ choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "InternLM-7B", "Other"],
406
+ label="LLM type",
407
  multiselect=False,
408
+ value="LLaMA-7B",
409
  interactive=True,
410
  )
411
+ LLM_name_textbox = gr.Textbox(
412
+ label="LLM model (for Other)",
413
+ placeholder="LLaMA-13B"
414
+ )
415
+ model_link = gr.Textbox(
416
+ label="Model Link", placeholder="https://huggingface.co/decapoda-research/llama-7b-hf"
417
+ )
418
+ model_size = gr.Textbox(
419
+ label="Model size", placeholder="7B(Input content format must be 'number+B' or '-')"
420
  )
421
+
422
+ with gr.Column():
423
+
424
+ input_file = gr.components.File(label = "Click to Upload a json File", file_count="single", type='binary')
425
+ submit_button = gr.Button("Submit Eval")
426
+
427
+ submission_result = gr.Markdown()
428
+ submit_button.click(
429
+ add_new_eval,
430
+ inputs = [
431
+ input_file,
432
+ model_name_textbox,
433
+ revision_name_textbox,
434
+ model_type,
435
+ model_link,
436
+ model_size,
437
+ LLM_type,
438
+ LLM_name_textbox,
439
+ ],
440
+ )
441
+
442
+
443
+ def refresh_data():
444
+ value1 = get_baseline_df()
445
+ return value1
446
 
447
  with gr.Row():
448
+ data_run = gr.Button("Refresh")
449
+ with gr.Row():
450
+ result_download = gr.Button("Download Leaderboard")
451
+ file_download = gr.File(label="download the csv of leaderborad.", visible=False)
452
+ data_run.click(on_filter_model_size_method_change, inputs=[checkbox_group], outputs=data_component)
453
+ result_download.click(download_csv, inputs=None, outputs= [file_download,file_download])
454
+
 
455
 
456
+ block.launch()