Spaces:

yourbench
/

advanced

Running on CPU Upgrade

App Files Files Community

alozowski HF Staff commited on Apr 1

Commit

54fa655

1 Parent(s): 8002416

Add documentation

Browse files

Files changed (1) hide show

yourbench_space/app.py +124 -116

yourbench_space/app.py CHANGED Viewed

@@ -30,8 +30,7 @@ project_description = """
 **Dynamic Benchmark Generation for Language Models**
 Quickly create zero-shot benchmarks from your documents – keeping models accurate and adaptable
-- 📖 [FAQ](#)
-- 💻 [GitHub](https://github.com/huggingface/yourbench)
 """
 logger.remove()
@@ -249,138 +248,147 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
     gr.Markdown(project_description)
     with gr.Tabs() as tabs:
-        with gr.Tab("Setup", id=0):
-            with gr.Row():
-                with gr.Accordion("Hugging Face Settings"):
-                    login_btn = gr.LoginButton()
-                    hf_org_dropdown = gr.Dropdown(choices=[], label="Organization", allow_custom_value=True)
-                    app.load(update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown)
-                    hf_dataset_name = gr.Textbox(
-                        label="Dataset name",
-                        value="yourbench",
-                        info="Name of your new evaluation dataset",
-                    )
-                with gr.Accordion("Upload Files"):
-                    file_input = gr.File(
-                        label="Upload text files",
-                        file_count="multiple",
-                        file_types=[".txt", ".md", ".html", ".pdf"],
-                    )
-                    output = gr.Textbox(label="Log")
-                    file_input.upload(
-                        save_files,
-                        inputs=[session_state, file_input],
-                        outputs=output,
-                    )
-                    delete_button = gr.Button("Delete Uploaded Files", visible=False)
-            preview_button = gr.Button("Generate New Config", interactive=False)
-            log_message = gr.Textbox(label="Log Message", visible=True)
-            download_button = gr.File(label="Download Config", visible=False, interactive=False)
-            file_input.change(
-                lambda files: gr.update(visible=bool(files)),
-                inputs=file_input,
-                outputs=delete_button,
-            )
-            file_input.change(enable_button, inputs=file_input, outputs=preview_button)
-            def clean_and_confirm(uid):
-                MANAGERS.clean_workdir(uid)
-                return (
-                    "🗑️ All uploaded files have been deleted!",
-                    gr.update(value=None),
-                    gr.update(interactive=False),
                 )
-            delete_button.click(
-                clean_and_confirm,
-                inputs=session_state,
-                outputs=[output, file_input, preview_button],
-            )
-            preview_button.click(
-                generate_and_return,
-                inputs=[hf_org_dropdown, hf_dataset_name, session_state],
-                outputs=[log_message, download_button],
-            )
-            preview_button.click(
-                switch_to_run_generation_tab,
-                inputs=None,
-                outputs=tabs,
-            )
-        with gr.Tab("Run Generation", id=1):
-            with gr.Row():
-                start_button = gr.Button("Start Task")
-                stop_button = gr.Button("Stop Task")
-                kill_button = gr.Button("Kill Task")
-            start_button.click(prepare_task, inputs=[session_state, login_btn, hf_dataset_name])
-            stop_button.click(MANAGERS.stop_process, inputs=session_state)
-            kill_button.click(MANAGERS.kill_process, inputs=session_state)
-            process_status = gr.Checkbox(label="Process Status", interactive=False)
-            status_timer = gr.Timer(2.0, active=True)
-            status_timer.tick(update_process_status, inputs=session_state, outputs=process_status)
-            with gr.Row():
-                with gr.Accordion("Stages", open=True):
-                    stages_table = gr.CheckboxGroup(
-                        choices=map_stage_names(STAGES),
-                        value=[],
-                        label="Pipeline Stages Completed",
-                        container=False,
-                        interactive=False,
-                    )
-            with gr.Row():
-                with gr.Column():
-                    with gr.Accordion("Log Output", open=True):
-                        log_output = gr.Code(language=None, lines=20, interactive=False)
-                with gr.Column():
-                    with gr.Accordion("Ingestion Preview"):
-                        ingestion_df = gr.DataFrame()
-                    with gr.Accordion("Summarization Preview"):
-                        summarization_df = gr.DataFrame()
-                    with gr.Accordion("Single Shot Preview"):
-                        single_shot_df = gr.DataFrame()
-                    with gr.Accordion("Multi Hop Preview"):
-                        multi_hop_df = gr.DataFrame()
-                    with gr.Accordion("Lighteval Preview"):
-                        lighteval_df = gr.DataFrame()
-                stages_table.change(
-                    update_dataset,
-                    inputs=[stages_table, hf_org_dropdown, hf_dataset_name],
-                    outputs=[ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df],
-                )
-                stages_table.change(
-                    on_generation_succsess,
-                    inputs=stages_table,
-                    outputs=[tabs,btn_launch_evals],
-                )
-                # TODO: this timer should only be active when the second tab is passed to active for the first time
-                log_timer = gr.Timer(1.0, active=True)
-                log_timer.tick(
-                    MANAGERS.read_and_get_output,
-                    inputs=session_state,
-                    outputs=[log_output, stages_table],
-                )
-        with gr.Tab("Evaluate", id=2):
             with gr.Column():
-                gr.Markdown("### 🧪 Run YourBench Evaluation")
-                gr.Markdown("Run the full evaluation pipeline on the uploaded dataset. This includes computing metrics, creating the leaderboard, and pushing results.")
                 with gr.Row():
                     with gr.Column():

 **Dynamic Benchmark Generation for Language Models**
 Quickly create zero-shot benchmarks from your documents – keeping models accurate and adaptable
+- 💻 [Yourbench GitHub](https://github.com/huggingface/yourbench)
 """
 logger.remove()
     gr.Markdown(project_description)
     with gr.Tabs() as tabs:
+        with gr.Tab("Choose Documents & Settings", id=0):
+            with gr.Column():
+                gr.Markdown("### 📄 Choose your documents and settings")
+                gr.Markdown("Upload your source documents that will form the knowledge base for your benchmark. Set a Hugging Face organization and dataset name.")
+                gr.Markdown("This step also generates a config file for running the benchmark pipeline. You can download it to run YourBench locally.")
+                with gr.Row():
+                    with gr.Accordion("Hugging Face Settings"):
+                        login_btn = gr.LoginButton()
+                        hf_org_dropdown = gr.Dropdown(choices=[], label="Organization", allow_custom_value=True)
+                        app.load(update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown)
+                        hf_dataset_name = gr.Textbox(
+                            label="Dataset name",
+                            value="yourbench",
+                            info="Name of your new evaluation dataset",
+                        )
+                    with gr.Accordion("Upload Files"):
+                        file_input = gr.File(
+                            label="Upload text files",
+                            file_count="multiple",
+                            file_types=[".txt", ".md", ".html", ".pdf"],
+                        )
+                        output = gr.Textbox(label="Log")
+                        file_input.upload(
+                            save_files,
+                            inputs=[session_state, file_input],
+                            outputs=output,
+                        )
+                        delete_button = gr.Button("Delete Uploaded Files", visible=False)
+                preview_button = gr.Button("Generate New Config", interactive=False)
+                log_message = gr.Textbox(label="Log Message", visible=True)
+                download_button = gr.File(label="Download Config", visible=False, interactive=False)
+                file_input.change(
+                    lambda files: gr.update(visible=bool(files)),
+                    inputs=file_input,
+                    outputs=delete_button,
+                )
+                file_input.change(enable_button, inputs=file_input, outputs=preview_button)
+                def clean_and_confirm(uid):
+                    MANAGERS.clean_workdir(uid)
+                    return (
+                        "🗑️ All uploaded files have been deleted!",
+                        gr.update(value=None),
+                        gr.update(interactive=False),
+                    )
+                delete_button.click(
+                    clean_and_confirm,
+                    inputs=session_state,
+                    outputs=[output, file_input, preview_button],
+                )
+                preview_button.click(
+                    generate_and_return,
+                    inputs=[hf_org_dropdown, hf_dataset_name, session_state],
+                    outputs=[log_message, download_button],
+                )
+                preview_button.click(
+                    switch_to_run_generation_tab,
+                    inputs=None,
+                    outputs=tabs,
                 )
+        with gr.Tab("Run Benchmark Pipeline", id=1):
+            with gr.Column():
+                gr.Markdown("### ⚙️ Run the benchmark generation pipeline")
+                gr.Markdown("Start the pipeline to process documents, generate questions, and build the private evaluation dataset. Watch logs, track progress, and preview the results.")
+                with gr.Row():
+                    start_button = gr.Button("Start Task")
+                    stop_button = gr.Button("Stop Task")
+                    kill_button = gr.Button("Kill Task")
+                start_button.click(prepare_task, inputs=[session_state, login_btn, hf_dataset_name])
+                stop_button.click(MANAGERS.stop_process, inputs=session_state)
+                kill_button.click(MANAGERS.kill_process, inputs=session_state)
+                process_status = gr.Checkbox(label="Process Status", interactive=False)
+                status_timer = gr.Timer(2.0, active=True)
+                status_timer.tick(update_process_status, inputs=session_state, outputs=process_status)
+                with gr.Row():
+                    with gr.Accordion("Stages", open=True):
+                        stages_table = gr.CheckboxGroup(
+                            choices=map_stage_names(STAGES),
+                            value=[],
+                            label="Pipeline Stages Completed",
+                            container=False,
+                            interactive=False,
+                        )
+                with gr.Row():
+                    with gr.Column():
+                        with gr.Accordion("Log Output", open=True):
+                            log_output = gr.Code(language=None, lines=20, interactive=False)
+                    with gr.Column():
+                        with gr.Accordion("Ingestion Preview"):
+                            ingestion_df = gr.DataFrame()
+                        with gr.Accordion("Summarization Preview"):
+                            summarization_df = gr.DataFrame()
+                        with gr.Accordion("Single Shot Preview"):
+                            single_shot_df = gr.DataFrame()
+                        with gr.Accordion("Multi Hop Preview"):
+                            multi_hop_df = gr.DataFrame()
+                        with gr.Accordion("Lighteval Preview"):
+                            lighteval_df = gr.DataFrame()
+                    stages_table.change(
+                        update_dataset,
+                        inputs=[stages_table, hf_org_dropdown, hf_dataset_name],
+                        outputs=[ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df],
+                    )
+                    stages_table.change(
+                        on_generation_succsess,
+                        inputs=stages_table,
+                        outputs=[tabs,btn_launch_evals],
+                    )
+                    # TODO: this timer should only be active when the second tab is passed to active for the first time
+                    log_timer = gr.Timer(1.0, active=True)
+                    log_timer.tick(
+                        MANAGERS.read_and_get_output,
+                        inputs=session_state,
+                        outputs=[log_output, stages_table],
+                    )
+        with gr.Tab("Evaluate Models on Benchmark", id=2):
             with gr.Column():
+                gr.Markdown("### 🧪 Evaluate models on your benchmark")
+                gr.Markdown("Runs the evaluation with [Lighteval](https://github.com/huggingface/lighteval) on the resulted dataset using 5+ open models, then deploys a leaderboard as a Hugging Face Space under your org.")
                 with gr.Row():
                     with gr.Column():