yangzhitao commited on
Commit
04c5cbd
·
1 Parent(s): 9719e89

feat: enhance submission tab with detailed instructions and improved model information input fields

Browse files
Files changed (2) hide show
  1. app.py +218 -77
  2. src/about.py +17 -0
app.py CHANGED
@@ -14,6 +14,7 @@ from src.about import (
14
  EVALUATION_QUEUE_TEXT,
15
  INTRODUCTION_TEXT,
16
  LLM_BENCHMARKS_TEXT,
 
17
  TITLE,
18
  )
19
  from src.backend.app import create_app
@@ -23,6 +24,7 @@ from src.display.css_html_js import (
23
  backend_status_js,
24
  custom_css,
25
  )
 
26
  from src.display.utils import (
27
  BASE_COLS,
28
  BENCHMARK_COLS,
@@ -358,31 +360,53 @@ def create_submit_tab(tab_id: int, demo: gr.Blocks):
358
  )
359
  with gr.Row():
360
  gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
361
-
362
  with gr.Row():
363
- search_name = gr.Textbox(label="search model name", placeholder="user/model_name")
364
 
365
- with gr.Row():
366
- table = gr.Dataframe(
367
- headers=["Model Name", "Pipeline", "Downloads", "Likes"],
368
- datatype=["str", "str", "number", "number"],
369
- interactive=False,
370
- wrap=True,
371
- label="click model name to select",
372
- )
373
 
374
- with gr.Row():
375
- with gr.Column():
376
- model_name_textbox = gr.Textbox(label="Model name", placeholder="user/model_name")
377
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
378
- _model_type = gr.Dropdown(
379
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
380
- label="Model type",
381
- multiselect=False,
382
- value=None,
383
- interactive=True,
384
  )
385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  def search_models(query):
387
  if not query.strip():
388
  return []
@@ -401,6 +425,43 @@ def create_submit_tab(tab_id: int, demo: gr.Blocks):
401
  search_name.change(fn=search_models, inputs=search_name, outputs=table)
402
  table.select(fn=on_select, inputs=table, outputs=model_name_textbox)
403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
  def file_to_json_str(file) -> str:
405
  """
406
  读取上传的 JSON 文件并返回字符串
@@ -422,14 +483,15 @@ def create_submit_tab(tab_id: int, demo: gr.Blocks):
422
  raise gr.Error(f"Error reading JSON file: {str(e)}") from e
423
 
424
  json_upload = gr.File(
425
- label="Upload JSON file",
426
  file_types=[".json"],
427
  type="filepath",
 
428
  )
429
  json_str = gr.Textbox(
430
  label="JSON Content",
431
- placeholder="JSON content will appear here after upload",
432
- lines=10,
433
  interactive=True,
434
  visible=False,
435
  )
@@ -439,61 +501,140 @@ def create_submit_tab(tab_id: int, demo: gr.Blocks):
439
  outputs=json_str,
440
  )
441
 
442
- with gr.Column():
443
- precision = gr.Dropdown(
444
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
445
- label="Precision",
446
- multiselect=False,
447
- value="float16",
448
- interactive=True,
449
- )
450
- weight_type = gr.Dropdown(
451
- choices=[i.value.name for i in WeightType],
452
- label="Weights type",
453
- multiselect=False,
454
- value="Original",
455
- interactive=True,
456
- )
457
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
458
- commit_textbox = gr.Textbox(label="Commits")
459
-
460
- submit_button = gr.Button("Submit Eval")
461
- submission_result = gr.Markdown()
462
- submit_button.click(
463
- add_new_submit,
464
- [
465
- model_name_textbox,
466
- base_model_name_textbox,
467
- revision_name_textbox,
468
- precision,
469
- weight_type,
470
- json_str,
471
- commit_textbox,
472
- ],
473
- submission_result,
474
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
 
476
- # Backend status indicator
477
- backend_status = gr.HTML(
478
- value=get_backend_status_undefined_html(),
479
- elem_id="backend-status-container",
480
- )
481
- # trigger button to bind the click event
482
- status_trigger = gr.Button(elem_id="backend-status-trigger-btn", visible=False)
483
- status_trigger.click(
484
- fn=lambda: check_backend_health()[1],
485
- inputs=None,
486
- outputs=backend_status,
487
- )
488
- # load external JavaScript file
489
- js_content = backend_status_js()
490
- status_trigger_js_html = f'<script>{js_content}</script>'
491
- gr.HTML(status_trigger_js_html, visible=False)
492
- demo.load(
493
- fn=lambda: check_backend_health()[1],
494
- inputs=None,
495
- outputs=backend_status,
496
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
 
499
  def main():
 
14
  EVALUATION_QUEUE_TEXT,
15
  INTRODUCTION_TEXT,
16
  LLM_BENCHMARKS_TEXT,
17
+ SUBMISSION_INSTRUCTIONS_TEXT,
18
  TITLE,
19
  )
20
  from src.backend.app import create_app
 
24
  backend_status_js,
25
  custom_css,
26
  )
27
+ from src.display.formatting import styled_error
28
  from src.display.utils import (
29
  BASE_COLS,
30
  BENCHMARK_COLS,
 
360
  )
361
  with gr.Row():
362
  gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
 
363
  with gr.Row():
364
+ gr.Markdown(SUBMISSION_INSTRUCTIONS_TEXT, elem_classes="markdown-text")
365
 
366
+ # ========== Section 1: Steps 1-7 (Model Information) ==========
367
+ with gr.Accordion("📋 Model Information (Steps 1-7)", open=True):
368
+ with gr.Row():
369
+ search_name = gr.Textbox(label="1. Search model name", placeholder="org/model_name")
 
 
 
 
370
 
371
+ with gr.Row():
372
+ table = gr.Dataframe(
373
+ headers=["Model Name", "Pipeline", "Downloads", "Likes"],
374
+ datatype=["str", "str", "number", "number"],
375
+ interactive=False,
376
+ wrap=True,
377
+ label="2. Click model name to select",
 
 
 
378
  )
379
 
380
+ with gr.Row():
381
+ with gr.Column():
382
+ model_name_textbox = gr.Textbox(label="2. Model name (auto-filled)", placeholder="org/model_name")
383
+ revision_name_textbox = gr.Textbox(label="3. Revision commit (Optional)", placeholder="main")
384
+ model_type = gr.Dropdown(
385
+ choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
386
+ label="4. Model type",
387
+ multiselect=False,
388
+ value=None,
389
+ interactive=True,
390
+ )
391
+ precision = gr.Dropdown(
392
+ choices=[i.value.name for i in Precision if i != Precision.Unknown],
393
+ label="5. Precision",
394
+ multiselect=False,
395
+ value="float16",
396
+ interactive=True,
397
+ )
398
+ weight_type = gr.Dropdown(
399
+ choices=[i.value.name for i in WeightType],
400
+ label="6. Weights type",
401
+ multiselect=False,
402
+ value="Original",
403
+ interactive=True,
404
+ )
405
+ base_model_name_textbox = gr.Textbox(
406
+ label="7. Base model (Optional, for delta or adapter weights)",
407
+ placeholder="org/model_name",
408
+ )
409
+
410
  def search_models(query):
411
  if not query.strip():
412
  return []
 
425
  search_name.change(fn=search_models, inputs=search_name, outputs=table)
426
  table.select(fn=on_select, inputs=table, outputs=model_name_textbox)
427
 
428
+ # ========== Section 2: Steps 8-10 (Benchmark Evaluation Results) ==========
429
+ with gr.Accordion("📊 Benchmark Evaluation Results (Steps 8-10)", open=True):
430
+ gr.Markdown(
431
+ "**8-9. Select benchmarks and fill in evaluation result values**\n\n"
432
+ "Fill in the form below to submit your benchmark evaluation results."
433
+ )
434
+
435
+ # Simple form for benchmark results
436
+ benchmarks = get_benchmarks()
437
+ benchmark_results_form = []
438
+ for benchmark in benchmarks:
439
+ with gr.Row():
440
+ benchmark_checkbox = gr.Checkbox(
441
+ label=f"{benchmark.title} ({benchmark.key})",
442
+ value=False,
443
+ )
444
+ result_input = gr.Number(
445
+ label="Result Value",
446
+ value=None,
447
+ interactive=True,
448
+ visible=False,
449
+ )
450
+
451
+ def make_visibility_fn(_result_comp):
452
+ def toggle_visibility(checked):
453
+ return gr.update(visible=checked)
454
+
455
+ return toggle_visibility
456
+
457
+ benchmark_checkbox.change(
458
+ fn=make_visibility_fn(result_input),
459
+ inputs=benchmark_checkbox,
460
+ outputs=result_input,
461
+ )
462
+ benchmark_results_form.append((benchmark, benchmark_checkbox, result_input))
463
+
464
+ # JSON upload as alternative
465
  def file_to_json_str(file) -> str:
466
  """
467
  读取上传的 JSON 文件并返回字符串
 
483
  raise gr.Error(f"Error reading JSON file: {str(e)}") from e
484
 
485
  json_upload = gr.File(
486
+ label="Or upload JSON file (alternative to form above)",
487
  file_types=[".json"],
488
  type="filepath",
489
+ visible=False,
490
  )
491
  json_str = gr.Textbox(
492
  label="JSON Content",
493
+ placeholder='{"config": {...}, "results": {...}}',
494
+ lines=15,
495
  interactive=True,
496
  visible=False,
497
  )
 
501
  outputs=json_str,
502
  )
503
 
504
+ # ========== Submit Section ==========
505
+ with gr.Row():
506
+ commit_textbox = gr.Textbox(label="Commit Message (Optional)", placeholder="Optional commit message")
507
+
508
+ def build_json_from_form(
509
+ model_name: str,
510
+ revision: str,
511
+ precision: str,
512
+ benchmark_checkbox_values: list,
513
+ benchmark_result_values: list,
514
+ ) -> str:
515
+ """Build JSON from form inputs"""
516
+ import json
517
+
518
+ if not model_name or not model_name.strip():
519
+ raise ValueError("Model name is required")
520
+
521
+ # Extract model_key from model_name (simple conversion)
522
+ model_key = model_name.lower().replace("/", "_").replace("-", "_")
523
+
524
+ # Build config
525
+ config = {
526
+ "model_name": model_name,
527
+ "model_key": model_key,
528
+ "model_dtype": f"torch.{precision}" if precision else None,
529
+ "model_sha": revision or "main",
530
+ "model_args": None,
531
+ }
532
+
533
+ # Build results: {benchmark_key: {metric: value}}
534
+ results = {}
535
+ benchmarks_list = get_benchmarks()
536
+ for benchmark, checkbox_checked, result_value in zip(
537
+ benchmarks_list, benchmark_checkbox_values, benchmark_result_values, strict=True
538
+ ):
539
+ if checkbox_checked and result_value is not None:
540
+ # Use "acc" as the default metric (can be extended)
541
+ results[benchmark.key] = {"acc": float(result_value)}
542
+
543
+ if not results:
544
+ raise ValueError("At least one benchmark result is required")
545
+
546
+ return json.dumps({"config": config, "results": results}, indent=2, ensure_ascii=False)
547
+
548
+ def submit_with_form_or_json(
549
+ model: str,
550
+ base_model: str,
551
+ revision: str,
552
+ precision: str,
553
+ weight_type: str,
554
+ model_type: str, # Currently not used but kept for future use
555
+ json_str: str,
556
+ commit_message: str,
557
+ *benchmark_values,
558
+ ):
559
+ """Submit with either form data or JSON"""
560
+ import json
561
+
562
+ # If JSON is provided, use it; otherwise build from form
563
+ if json_str and json_str.strip():
564
+ try:
565
+ # Validate JSON
566
+ json.loads(json_str)
567
+ final_json = json_str
568
+ except json.JSONDecodeError:
569
+ return styled_error("Invalid JSON format. Please check your JSON content.")
570
+ else:
571
+ # Build JSON from form
572
+ # benchmark_values contains pairs of (checkbox_value, result_value) for each benchmark
573
+ benchmarks_list = get_benchmarks()
574
+ if len(benchmark_values) != len(benchmarks_list) * 2:
575
+ return styled_error("Invalid benchmark form data. Please check your inputs.")
576
+
577
+ # Split into checkbox values and result values
578
+ benchmark_checkbox_values = [benchmark_values[i] for i in range(0, len(benchmark_values), 2)]
579
+ benchmark_result_values = [benchmark_values[i] for i in range(1, len(benchmark_values), 2)]
580
 
581
+ try:
582
+ final_json = build_json_from_form(
583
+ model, revision, precision, benchmark_checkbox_values, benchmark_result_values
584
+ )
585
+ except ValueError as e:
586
+ return styled_error(str(e))
587
+ except Exception as e:
588
+ return styled_error(f"Error building JSON: {str(e)}")
589
+
590
+ # Call the submit function
591
+ return add_new_submit(model, base_model, revision, precision, weight_type, final_json, commit_message)
592
+
593
+ submit_button = gr.Button("10. Submit Eval", variant="primary")
594
+ submission_result = gr.Markdown()
595
+
596
+ # Collect all inputs for submission
597
+ all_inputs = [
598
+ model_name_textbox,
599
+ base_model_name_textbox,
600
+ revision_name_textbox,
601
+ precision,
602
+ weight_type,
603
+ model_type,
604
+ json_str,
605
+ commit_textbox,
606
+ ]
607
+ # Add benchmark form inputs
608
+ for _, checkbox, result_input in benchmark_results_form:
609
+ all_inputs.extend([checkbox, result_input])
610
+
611
+ submit_button.click(
612
+ fn=submit_with_form_or_json,
613
+ inputs=all_inputs,
614
+ outputs=submission_result,
615
+ )
616
+
617
+ # Backend status indicator
618
+ backend_status = gr.HTML(
619
+ value=get_backend_status_undefined_html(),
620
+ elem_id="backend-status-container",
621
+ )
622
+ # trigger button to bind the click event
623
+ status_trigger = gr.Button(elem_id="backend-status-trigger-btn", visible=False)
624
+ status_trigger.click(
625
+ fn=lambda: check_backend_health()[1],
626
+ inputs=None,
627
+ outputs=backend_status,
628
+ )
629
+ # load external JavaScript file
630
+ js_content = backend_status_js()
631
+ status_trigger_js_html = f'<script>{js_content}</script>'
632
+ gr.HTML(status_trigger_js_html, visible=False)
633
+ demo.load(
634
+ fn=lambda: check_backend_health()[1],
635
+ inputs=None,
636
+ outputs=backend_status,
637
+ )
638
 
639
 
640
  def main():
src/about.py CHANGED
@@ -63,3 +63,20 @@ CITATION_BUTTON_TEXT = dedent("""
63
  year={2025}
64
  }
65
  """).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  year={2025}
64
  }
65
  """).strip()
66
+
67
+ # --------------------------------------
68
+
69
+ SUBMISSION_INSTRUCTIONS_TEXT = dedent("""
70
+ ## Submission Instructions
71
+
72
+ 1. Fill in the model name to search for on the HuggingFace Hub. (e.g. `qwen/qwen3-vl-8b-instruct` )
73
+ 2. Select the model from search results, and check the model name autofilled below (e.g. `Qwen/Qwen3-VL-8B-Instruct`).
74
+ 3. (Optional) Fill in the revision commit of the model. If not filled, means using the latest `main` branch.
75
+ 4. Select the model type. (e.g. `pretrained`)
76
+ 5. Select the precision of the model. (e.g. `bfloat16`)
77
+ 6. Select the weights type of the model. (defaults to `Original`)
78
+ 7. (Optional) Fill in the base model name for **delta** or **adapter** weights. (e.g. `Qwen/Qwen3-VL-8B-Instruct`)
79
+ 8. Select a benchmark to evaluate on, and fill in the evaluation result value (e.g. `0.5` for `VSI-bench` `acc`).
80
+ 9. (Optional) click **[+]** button to add more benchmarks & evaluation result values.
81
+ 10. Click the **Submit Eval** button to submit the evaluation request.
82
+ """)