| import abc, sys |
| import gradio as gr |
|
|
| from gen_table import * |
| from meta_data import * |
|
|
| |
| |
|
|
| head_style = """ |
| <style> |
| @media (min-width: 1536px) |
| { |
| .gradio-container { |
| min-width: var(--size-full) !important; |
| } |
| } |
| </style> |
| """ |
|
|
| with gr.Blocks(title="Frontier AI Cybersecurity Observatory", head= |
| head_style) as demo: |
| struct = load_results() |
| timestamp = struct['time'] |
| EVAL_TIME = format_timestamp(timestamp) |
| results = struct['results'] |
| benchmark_list=list(results.keys()) |
|
|
| N_DATA = len(benchmark_list) |
| DATASETS = benchmark_list |
|
|
| gr.Markdown(LEADERBORAD_INTRODUCTION.format(N_DATA,EVAL_TIME)) |
| structs = [abc.abstractproperty() for _ in range(N_DATA)] |
|
|
| with gr.Tabs(elem_id="leaderboard_tabs", elem_classes='tab-buttons') as tabs: |
| |
| |
| |
| |
|
|
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| with gr.TabItem('๐ About', elem_id='about', id=1): |
| with open("about.md", 'r', encoding="utf-8") as file: |
| gr.Markdown(file.read()) |
|
|
| for i, benchmark in enumerate(benchmark_list): |
| with gr.TabItem(f'๐ {benchmark} Leaderboard', elem_id=benchmark, id=i + 2): |
| if benchmark in LEADERBOARD_MD: |
| gr.Markdown(LEADERBOARD_MD[benchmark]) |
|
|
| s = structs[i] |
| s.table, s.check_box = BUILD_L2_DF(results, benchmark) |
| s.type_map = s.check_box['type_map'] |
|
|
| s.checkbox_group = gr.CheckboxGroup( |
| choices=s.check_box['all'], |
| value=s.check_box['required'], |
| label=f'{benchmark} CheckBoxes', |
| interactive=True, |
| ) |
| s.headers = s.check_box['essential'] + s.checkbox_group.value |
|
|
| if benchmark!='SWE-bench-verified': |
| with gr.Row(): |
| s.model_name = gr.Textbox( |
| value='Input the Model Name (fuzzy, case insensitive)', |
| label='Model Name', |
| interactive=True, |
| visible=True) |
| else: |
| with gr.Row(): |
| s.model_name = gr.Textbox( |
| value='Input the Agent Name (fuzzy, case insensitive)', |
| label='Agent Name', |
| interactive=True, |
| visible=True) |
| s.data_component = gr.components.DataFrame( |
| value=s.table[s.headers], |
| type='pandas', |
| datatype=[s.type_map[x] for x in s.headers], |
| interactive=False, |
| wrap=True, |
| visible=True) |
| s.dataset = gr.Textbox(value=benchmark, label=benchmark, visible=False) |
|
|
| def filter_df_l2(dataset_name, fields, model_name): |
| s = structs[benchmark_list.index(dataset_name)] |
| headers = s.check_box['essential'] + fields |
| df = cp.deepcopy(s.table) |
| if dataset_name!="SWE-bench-verified": |
| default_val = 'Input the Model Name (fuzzy, case insensitive)' |
| else: |
| default_val = 'Input the Agent Name (fuzzy, case insensitive)' |
|
|
| if model_name != default_val: |
| print(model_name) |
| model_name = model_name.lower() |
| if dataset_name!="SWE-bench-verified": |
| method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']] |
| else: |
| method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']] |
| flag = [model_name in name for name in method_names] |
| df['TEMP_FLAG'] = flag |
| df = df[df['TEMP_FLAG'] == True] |
| df.pop('TEMP_FLAG') |
|
|
| comp = gr.components.DataFrame( |
| value=df[headers], |
| type='pandas', |
| datatype=[s.type_map[x] for x in headers], |
| interactive=False, |
| wrap=True, |
| visible=True) |
| return comp |
|
|
| for cbox in [s.checkbox_group]: |
| cbox.change( |
| fn=filter_df_l2, |
| inputs=[s.dataset, s.checkbox_group, s.model_name], |
| outputs=s.data_component) |
| s.model_name.submit( |
| fn=filter_df_l2, |
| inputs=[s.dataset, s.checkbox_group, s.model_name], |
| outputs=s.data_component) |
|
|
| with gr.Row(): |
| with gr.Accordion('Citation', open=False): |
| citation_button = gr.Textbox( |
| value=CITATION_BUTTON_TEXT, |
| label=CITATION_BUTTON_LABEL, |
| elem_id='citation-button') |
|
|
| if __name__ == '__main__': |
| demo.launch(server_name='0.0.0.0', share=True) |
|
|