openhands commited on
Commit
bb0cd90
·
1 Parent(s): fd4b87b

Clean up unused code, files, and assets

Browse files

Removed unused Python code:
- svg_to_data_uri() from leaderboard_transformer.py
- create_svg_html() from ui_components.py
- format_llm_base_with_html() from ui_components.py

Consolidated duplicate code:
- Renamed get_company_from_model_name to get_company_from_model in leaderboard_transformer.py
- Removed duplicate COMPANY_LOGO_MAP and get_company_from_model from ui_components.py
- ui_components.py and visualizations.py now import from leaderboard_transformer.py

Removed unused files:
- submission.py (submission functionality not exposed in app)
- submission_utils.py (only used by submission.py)
- generate_mock_jsonl.py (development utility)
- tests/integration/test_submission.py (tests for removed submission code)

Removed unused config:
- CONTACT_DATASET and SUBMISSION_DATASET from config.py

Removed unused CSS:
- Submission page CSS section from content.py

Removed 35 unused asset files:
- Various legend SVGs (api-*, c-*, os-*, custom-*, standard-*, equivalent-*)
- Category icons (code-execution, data-analysis, etc.)
- Star SVGs (three-point, four-point, five-point)
- Old logo files (openhands-logo.svg, logo-alibaba.svg, logo-huggingface.svg)
- Unused ellipse colors and other SVGs

assets/api-custom.svg DELETED
assets/api-equivalent.svg DELETED
assets/api-legend.svg DELETED
assets/api-standard.svg DELETED
assets/c-custom.svg DELETED
assets/c-equivalent.svg DELETED
assets/c-legend.svg DELETED
assets/c-standard.svg DELETED
assets/code-execution.svg DELETED
assets/custom-legend.svg DELETED
assets/data-analysis.svg DELETED
assets/ellipse-coral.svg DELETED
assets/ellipse-white.svg DELETED
assets/end-to-end-discovery.svg DELETED
assets/equivalent-legend.svg DELETED
assets/five-point-star.svg DELETED
assets/four-point-star.svg DELETED
assets/just-icon.svg DELETED
assets/literature-understanding.svg DELETED
assets/logo-alibaba.svg DELETED
assets/logo-huggingface.svg DELETED
assets/openhands-logo.svg DELETED
assets/openhands_mark_color_forwhite.png DELETED
Binary file (17.6 kB)
 
assets/os-custom.svg DELETED
assets/os-equivalent.svg DELETED
assets/os-legend.svg DELETED
assets/os-ow-custom.svg DELETED
assets/os-ow-equivalent.svg DELETED
assets/os-ow-legend.svg DELETED
assets/os-ow-standard.svg DELETED
assets/os-standard.svg DELETED
assets/overall.svg DELETED
assets/standard-legend.svg DELETED
assets/three-point-star.svg DELETED
assets/up-arrow.svg DELETED
config.py CHANGED
@@ -5,16 +5,10 @@ CONFIG_NAME = os.getenv("HF_CONFIG", "1.0.0-dev1") # This corresponds to 'config
5
  IS_INTERNAL = os.environ.get("IS_INTERNAL", "false").lower() == "true"
6
 
7
  # OpenHands Index datasets
8
- CONTACT_DATASET = f"OpenHands/openhands-index-contact-info"
9
-
10
  if IS_INTERNAL:
11
- # datasets backing the internal leaderboard
12
- SUBMISSION_DATASET = f"OpenHands/openhands-index-internal-submissions"
13
  RESULTS_DATASET = f"OpenHands/openhands-index-internal-results"
14
  LEADERBOARD_PATH = f"OpenHands/openhands-index-internal-leaderboard"
15
  else:
16
- # datasets backing the public leaderboard
17
- SUBMISSION_DATASET = f"OpenHands/openhands-index-submissions"
18
  RESULTS_DATASET = f"OpenHands/openhands-index-results"
19
  LEADERBOARD_PATH = f"OpenHands/openhands-index"
20
 
 
5
  IS_INTERNAL = os.environ.get("IS_INTERNAL", "false").lower() == "true"
6
 
7
  # OpenHands Index datasets
 
 
8
  if IS_INTERNAL:
 
 
9
  RESULTS_DATASET = f"OpenHands/openhands-index-internal-results"
10
  LEADERBOARD_PATH = f"OpenHands/openhands-index-internal-leaderboard"
11
  else:
 
 
12
  RESULTS_DATASET = f"OpenHands/openhands-index-results"
13
  LEADERBOARD_PATH = f"OpenHands/openhands-index"
14
 
content.py CHANGED
@@ -631,129 +631,7 @@ span.wrap[tabindex="0"][role="button"][data-editable="false"] {
631
  margin: 20px 0;
632
  max-width: 800px;
633
  }
634
- /*------ Submission Page CSS ------*/
635
- #submission-modal .modal-container,
636
- #success-modal .modal-container {
637
- height: auto;
638
- max-width: 600px;
639
- }
640
-
641
- #submission-modal-content,
642
- #success-modal .submission-modal-content {
643
- padding: 20px;
644
- background-color: inherit;
645
- border-radius: 8px;
646
- text-align: center;
647
- }
648
-
649
- #submission-modal-content p,
650
- #success-modal .submission-modal-content p {
651
- font-size: 16px;
652
- }
653
 
654
- #legal-modal-content {
655
- padding: 30px;
656
- background-color: inherit;
657
- border-radius: 8px;
658
- text-align: left;
659
- font-size: 14px;
660
- }
661
-
662
- #legal-modal-content h2 {
663
- text-align: center;
664
- }
665
- #legal-modal-content button {
666
- width: fit-content;
667
- }
668
- .spinner-container {
669
- display: flex;
670
- flex-direction: column;
671
- align-items: center;
672
- justify-content: center;
673
- padding: 30px;
674
- }
675
-
676
- .spinner {
677
- width: 50px;
678
- height: 50px;
679
- border: 5px solid #dee2e6;
680
- border-top: 5px solid #007bff;
681
- border-radius: 50%;
682
- animation: spin 1s linear infinite;
683
- margin-bottom: 20px;
684
- }
685
-
686
- @keyframes spin {
687
- 0% { transform: rotate(0deg); }
688
- 100% { transform: rotate(360deg); }
689
- }
690
-
691
- #submission-page-container {
692
- max-width: 800px;
693
- margin: 0 auto;
694
- }
695
-
696
- #submission-file-label {
697
- padding: 10px;
698
- }
699
-
700
- #submission-button {
701
- max-width: fit-content;
702
- font-size: 14px;
703
- }
704
-
705
- .custom-form-group {
706
- border: 1px solid #000 !important;
707
- border-radius: 4px !important;
708
- padding: 24px !important;
709
- overflow: visible !important;
710
- }
711
-
712
- #openness-label-html,
713
- #agent-tooling-label-html,
714
- #agent-info-label-html,
715
- #submitter-info-label-html,
716
- #username-label-html,
717
- #email-label-html,
718
- #role-label-html {
719
- padding-left: 12px;
720
- }
721
-
722
- .form-label {
723
- margin: 4px 0px 0px 6px;
724
- }
725
-
726
- .form-label-fieldset {
727
- padding-top: 10px !important;
728
- }
729
-
730
- #agent-tooling-label-html {
731
- padding-top: 6px;
732
- }
733
-
734
- .custom-form-group,
735
- .styler {
736
- background: none;
737
- }
738
-
739
- #feedback-button {
740
- display: inline-block;
741
- background-color: var(--color-primary-link);
742
- color: white;
743
- border: none;
744
- border-radius: 4px;
745
- padding: 15px 20px;
746
- font-size: 16px;
747
- cursor: pointer;
748
- transition: all 0.3s ease;
749
- text-decoration: none;
750
- }
751
-
752
- #feedback-button:hover {
753
- background-color: var(--color-button-hover);
754
- transform: translateY(-2px);
755
- box-shadow: 0 6px 12px rgba(0,0,0,0.3);
756
- }
757
  .dark #main-header h2 {
758
  color: var(--color-primary-accent);
759
  }
 
631
  margin: 20px 0;
632
  max-width: 800px;
633
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
635
  .dark #main-header h2 {
636
  color: var(--color-primary-accent);
637
  }
generate_mock_jsonl.py DELETED
@@ -1,161 +0,0 @@
1
- """Generate mock results data in JSONL format for OpenHands Index."""
2
- import json
3
- import os
4
- from pathlib import Path
5
- from datetime import datetime
6
-
7
- # Define the 5 benchmarks
8
- BENCHMARKS = {
9
- "swe-bench": {
10
- "tags": ["swe-bench"],
11
- "metric": "resolve_rate",
12
- "metric_display": "Resolve Rate (%)"
13
- },
14
- "swe-bench-multimodal": {
15
- "tags": ["swe-bench-multimodal"],
16
- "metric": "resolve_rate",
17
- "metric_display": "Resolve Rate (%)"
18
- },
19
- "swt-bench": {
20
- "tags": ["swt-bench"],
21
- "metric": "success_rate",
22
- "metric_display": "Success Rate (%)"
23
- },
24
- "commit0": {
25
- "tags": ["commit0"],
26
- "metric": "test_pass_rate",
27
- "metric_display": "Test Pass Rate (%)"
28
- },
29
- "gaia": {
30
- "tags": ["gaia"],
31
- "metric": "accuracy",
32
- "metric_display": "Accuracy (%)"
33
- }
34
- }
35
-
36
- # Mock agents with realistic scores
37
- MOCK_AGENTS = [
38
- {
39
- "agent_name": "1.0.2",
40
- "llm_base": "claude-3-5-sonnet-20241022",
41
- "openness": "closed",
42
- "scores": {
43
- "swe-bench": 48.3,
44
- "swe-bench-multimodal": 42.1,
45
- "swt-bench": 65.4,
46
- "commit0": 71.2,
47
- "gaia": 58.7
48
- }
49
- },
50
- {
51
- "agent_name": "1.0.1",
52
- "llm_base": "gpt-4o-2024-11-20",
53
- "openness": "closed",
54
- "scores": {
55
- "swe-bench": 45.1,
56
- "swe-bench-multimodal": 39.5,
57
- "swt-bench": 62.3,
58
- "commit0": 68.9,
59
- "gaia": 55.2
60
- }
61
- },
62
- {
63
- "agent_name": "1.0.0",
64
- "llm_base": "gpt-4-turbo-2024-04-09",
65
- "openness": "closed",
66
- "scores": {
67
- "swe-bench": 38.7,
68
- "swe-bench-multimodal": 34.2,
69
- "swt-bench": 54.1,
70
- "commit0": 61.5,
71
- "gaia": 48.3
72
- }
73
- },
74
- {
75
- "agent_name": "0.9.5",
76
- "llm_base": "gpt-4o-mini-2024-07-18",
77
- "openness": "closed",
78
- "scores": {
79
- "swe-bench": 32.5,
80
- "swe-bench-multimodal": 28.9,
81
- "swt-bench": 47.8,
82
- "commit0": 55.3,
83
- "gaia": 42.1
84
- }
85
- },
86
- {
87
- "agent_name": "0.9.0",
88
- "llm_base": "claude-3-opus-20240229",
89
- "openness": "closed",
90
- "scores": {
91
- "swe-bench": 29.8,
92
- "swe-bench-multimodal": 25.7,
93
- "swt-bench": 44.2,
94
- "commit0": 52.1,
95
- "gaia": 39.4
96
- }
97
- },
98
- ]
99
-
100
-
101
- def generate_mock_data():
102
- """Generate mock JSONL files for all benchmarks."""
103
- output_dir = Path("mock_results/1.0.0-dev1")
104
- output_dir.mkdir(parents=True, exist_ok=True)
105
-
106
- # Create agenteval.json config
107
- config = {
108
- "suite_config": {
109
- "name": "openhands-index",
110
- "version": "1.0.0-dev1",
111
- "splits": []
112
- }
113
- }
114
-
115
- # Generate data for each benchmark
116
- for benchmark_name, benchmark_info in BENCHMARKS.items():
117
- print(f"Generating mock data for {benchmark_name}...")
118
-
119
- # Add to config
120
- config["suite_config"]["splits"].append({
121
- "name": benchmark_name,
122
- "tasks": [{
123
- "name": benchmark_name,
124
- "tags": benchmark_info["tags"]
125
- }]
126
- })
127
-
128
- # Generate JSONL file
129
- jsonl_path = output_dir / f"{benchmark_name}.jsonl"
130
- with open(jsonl_path, 'w') as f:
131
- for agent in MOCK_AGENTS:
132
- record = {
133
- "agent_name": agent["agent_name"],
134
- "llm_base": agent["llm_base"],
135
- "openness": agent["openness"],
136
- "score": agent["scores"][benchmark_name],
137
- "metric": benchmark_info["metric"],
138
- "submission_time": datetime.now().isoformat(),
139
- "tags": benchmark_info["tags"],
140
- # Additional metadata
141
- "cost_per_instance": round(0.1 + agent["scores"][benchmark_name] * 0.01, 4),
142
- "average_runtime": round(300 + agent["scores"][benchmark_name] * 5, 1),
143
- }
144
- f.write(json.dumps(record) + '\n')
145
-
146
- print(f" Created {jsonl_path}")
147
-
148
- # Write config file
149
- config_path = output_dir / "agenteval.json"
150
- with open(config_path, 'w') as f:
151
- json.dump(config, f, indent=2)
152
- print(f"\nCreated config: {config_path}")
153
-
154
- print("\n✓ Mock data generation complete!")
155
- print(f" Location: {output_dir}")
156
- print(f" Benchmarks: {', '.join(BENCHMARKS.keys())}")
157
- print(f" Agents: {len(MOCK_AGENTS)}")
158
-
159
-
160
- if __name__ == "__main__":
161
- generate_mock_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard_transformer.py CHANGED
@@ -128,7 +128,7 @@ def add_branding_to_figure(fig: go.Figure) -> go.Figure:
128
  return fig
129
 
130
 
131
- def get_company_from_model_name(model_name: str) -> dict:
132
  """
133
  Gets the company info (logo path and name) from a model name.
134
  Returns default unknown logo if no match found.
@@ -696,7 +696,7 @@ def _plot_scatter_plotly(
696
 
697
  for _, row in data_plot.iterrows():
698
  model_name = row.get('Language Model', '')
699
- company_info = get_company_from_model_name(model_name)
700
  logo_path = company_info['path']
701
 
702
  # Read the SVG file and encode as base64 data URI
@@ -994,16 +994,6 @@ def get_pareto_df(data, cost_col=None, score_col=None):
994
  return pd.DataFrame(pareto_points)
995
 
996
 
997
- def svg_to_data_uri(path: str) -> str:
998
- """Reads an SVG file and encodes it as a Data URI for Plotly."""
999
- try:
1000
- with open(path, "rb") as f:
1001
- encoded_string = base64.b64encode(f.read()).decode()
1002
- return f"data:image/svg+xml;base64,{encoded_string}"
1003
- except FileNotFoundError:
1004
- logger.warning(f"SVG file not found at: {path}")
1005
- return None
1006
-
1007
  def clean_llm_base_list(model_list):
1008
  """
1009
  Cleans a list of model strings by keeping only the text after the last '/'.
 
128
  return fig
129
 
130
 
131
+ def get_company_from_model(model_name: str) -> dict:
132
  """
133
  Gets the company info (logo path and name) from a model name.
134
  Returns default unknown logo if no match found.
 
696
 
697
  for _, row in data_plot.iterrows():
698
  model_name = row.get('Language Model', '')
699
+ company_info = get_company_from_model(model_name)
700
  logo_path = company_info['path']
701
 
702
  # Read the SVG file and encode as base64 data URI
 
994
  return pd.DataFrame(pareto_points)
995
 
996
 
 
 
 
 
 
 
 
 
 
 
997
  def clean_llm_base_list(model_list):
998
  """
999
  Cleans a list of model strings by keeping only the text after the last '/'.
submission.py DELETED
@@ -1,474 +0,0 @@
1
- import logging
2
- import typing
3
-
4
- import matplotlib
5
- matplotlib.use('Agg')
6
-
7
- import os
8
- import shutil
9
- import tarfile
10
- from datetime import datetime, timedelta, timezone
11
- from email.utils import parseaddr
12
-
13
- import gradio as gr
14
- import requests
15
- from huggingface_hub import HfApi
16
-
17
- import aliases
18
- from submission_utils import (
19
- SUBMISSION_METADATA_FILENAME,
20
- SubmissionMetadata,
21
- sanitize_path_component,
22
- _validate_path_component
23
- )
24
- from config import (
25
- CONFIG_NAME,
26
- CONTACT_DATASET,
27
- EXTRACTED_DATA_DIR,
28
- RESULTS_DATASET,
29
- SUBMISSION_DATASET,
30
- )
31
- from content import (
32
- CITATION_BUTTON_LABEL,
33
- CITATION_BUTTON_TEXT,
34
- LEGAL_DISCLAIMER_TEXT,
35
- SUBMISSION_CONFIRMATION,
36
- format_error,
37
- format_log,
38
- format_warning,
39
- )
40
- from ui_components import build_openness_tooltip_content, build_tooling_tooltip_content
41
-
42
- # Simple stubs for dataset functionality (not using HF datasets)
43
- class DatasetDict(dict):
44
- """Simple stub for datasets.DatasetDict."""
45
- pass
46
-
47
- class Dataset:
48
- """Simple stub for datasets.Dataset."""
49
- @staticmethod
50
- def from_list(data):
51
- return Dataset()
52
-
53
- def load_dataset(*args, **kwargs):
54
- """Simple stub for datasets.load_dataset."""
55
- return DatasetDict()
56
-
57
- class EmptyDatasetError(Exception):
58
- """Simple stub for datasets exception."""
59
- pass
60
-
61
- class DataFilesNotFoundError(Exception):
62
- """Simple stub for datasets exception."""
63
- pass
64
-
65
-
66
- logger = logging.getLogger(__name__)
67
- logger.setLevel(logging.DEBUG)
68
-
69
- api = HfApi()
70
- MAX_UPLOAD_BYTES = 5e9
71
- os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
72
-
73
- # --- Submission Logic (largely unchanged from original, ensure LeaderboardSubmission and other deps are fine) ---
74
- def try_load_dataset_submission(*args, **kwargs) -> DatasetDict: # Renamed to avoid conflict if LV has one
75
- try:
76
- return load_dataset(*args, **kwargs)
77
- except EmptyDatasetError:
78
- return DatasetDict()
79
- except ValueError: # Handles cases where dataset is empty or ill-formed
80
- return DatasetDict()
81
- except DataFilesNotFoundError:
82
- return DatasetDict()
83
-
84
- def upload_submission(
85
- folder_path: str,
86
- split: str,
87
- submission_name: str,
88
- hf_username: str,
89
- ) -> str:
90
- total = 0
91
- for root, _, files in os.walk(folder_path):
92
- for f_ul in files:
93
- total += os.path.getsize(os.path.join(root, f_ul))
94
- if total > MAX_UPLOAD_BYTES:
95
- raise ValueError(
96
- f"Upload too large: exceeds {MAX_UPLOAD_BYTES // 1000000} MB limit."
97
- )
98
-
99
- # This is a copy of agenteval.upload.upload_folder_to_hf so we can use other api params.
100
- # TODO in agenteval: When you mildly wrap another library call, always pass *args, **kwargs.
101
- _validate_path_component(CONFIG_NAME, "config_name")
102
- _validate_path_component(split, "split")
103
- _validate_path_component(submission_name, "submission_name")
104
- dataset_url = f"hf://datasets/{SUBMISSION_DATASET}/{CONFIG_NAME}/{split}/{submission_name}"
105
- logger.info(f"Uploading dataset {dataset_url}")
106
- api.upload_folder(
107
- folder_path=folder_path,
108
- path_in_repo=f"{CONFIG_NAME}/{split}/{submission_name}",
109
- repo_id=SUBMISSION_DATASET,
110
- repo_type="dataset",
111
- # Reminder: This may be going into a public dataset.
112
- # Don't put private information in commit message such as email.
113
- commit_message=f'Submission from hf user "{hf_username}" to "{dataset_url}"',
114
- )
115
- return dataset_url
116
-
117
- def show_loading_spinner():
118
- return gr.update(visible=True)
119
-
120
- def add_new_eval(
121
- val_or_test: str,
122
- agent_name: str | None,
123
- agent_description: str,
124
- agent_url: str,
125
- openness: str | None,
126
- degree_of_control: str | None,
127
- path_to_file: typing.IO | None,
128
- username: str,
129
- role: str,
130
- email: str,
131
- email_opt_in: bool,
132
- profile: gr.OAuthProfile,
133
- ):
134
- if not agent_name:
135
- return (
136
- format_warning("Please provide an agent name."), # error_message
137
- gr.update(visible=True), # error_modal
138
- gr.update(visible=False), # success_modal
139
- gr.update(visible=False) # loading_modal
140
- )
141
-
142
- if path_to_file is None:
143
- return (
144
- format_warning("Please attach a .tar.gz file."), # error_message
145
- gr.update(visible=True), # error_modal
146
- gr.update(visible=False), # success_modal
147
- gr.update(visible=False) # loading_modal
148
- )
149
-
150
- logger.info(f"agent {agent_name}: Checking submission")
151
-
152
- # Load current eval_results for submission checks
153
- # This is a bit redundant if display part reloads it, but submission needs its own consistent view
154
- current_eval_results_for_submission = try_load_dataset_submission(
155
- RESULTS_DATASET,
156
- CONFIG_NAME,
157
- download_mode="force_redownload", # Or a less aggressive mode
158
- verification_mode=VerificationMode.NO_CHECKS,
159
- )
160
-
161
- submission_time = datetime.now(timezone.utc)
162
- if not username or username.strip() == "":
163
- username = profile.username # Default to HF username
164
-
165
- logger.debug(f"agent {agent_name}: User account age check {profile.username}")
166
- try:
167
- # Account age check disabled for launch.
168
- # https://github.com/allenai/astabench-issues/issues/419
169
- # if _is_hf_acct_too_new(submission_time, profile.username):
170
- # return (
171
- # format_error("This account is not authorized to submit here (account too new)."), # error_message
172
- # gr.update(visible=True), # error_modal
173
- # gr.update(visible=False), # success_modal
174
- # gr.update(visible=False) # loading_modal
175
- # )
176
- pass
177
- except Exception as e:
178
- logger.warning(f"Error checking user account age: {e}")
179
- return (
180
- format_error("Could not verify account age. Please try again later."), # error_message
181
- gr.update(visible=True), # error_modal
182
- gr.update(visible=False), # success_modal
183
- gr.update(visible=False) # loading_modal
184
- )
185
-
186
- logger.debug(f"agent {agent_name}: Submission frequency check {profile.username}")
187
- contact_infos = try_load_dataset_submission(
188
- CONTACT_DATASET, CONFIG_NAME, download_mode="force_redownload",
189
- verification_mode=VerificationMode.NO_CHECKS
190
- )
191
- if _is_last_submission_too_recent(
192
- contact_rows=contact_infos.get(val_or_test, []),
193
- username=profile.username,
194
- submission_time=submission_time,
195
- ):
196
- logger.info(f"agent {agent_name}: Denied submission because user {username} submitted recently")
197
- return (
198
- format_error("You already submitted once in the last 24h for this split; please try again later."), # error_message
199
- gr.update(visible=True), # error_modal
200
- gr.update(visible=False), # success_modal
201
- gr.update(visible=False) # loading_modal
202
- )
203
-
204
- logger.debug(f"agent {agent_name}: Email validation {email}")
205
- _, parsed_mail = parseaddr(email)
206
- if "@" not in parsed_mail:
207
- return (
208
- format_warning("Please provide a valid email address."), # error_message
209
- gr.update(visible=True), # error_modal
210
- gr.update(visible=False), # success_modal
211
- gr.update(visible=False) # loading_modal
212
- )
213
-
214
- logger.debug(f"agent {agent_name}: Duplicate submission check")
215
- if val_or_test in current_eval_results_for_submission and len(current_eval_results_for_submission[val_or_test]) > 0:
216
- existing_submissions = current_eval_results_for_submission[val_or_test].to_dict().get("submission", [])
217
- for sub_item in existing_submissions:
218
- if (sub_item.get("agent_name", "").lower() == agent_name.lower() and
219
- sub_item.get("username", "").lower() == username.lower()):
220
- return (
221
- format_warning("This agent name by this user has already been submitted to this split."), # error_message
222
- gr.update(visible=True), # error_modal
223
- gr.update(visible=False), # success_modal
224
- gr.update(visible=False) # loading_modal
225
- )
226
-
227
- safe_username = sanitize_path_component(username)
228
- safe_agent_name = sanitize_path_component(agent_name)
229
- extracted_dir = os.path.join(EXTRACTED_DATA_DIR, f"{safe_username}_{safe_agent_name}")
230
-
231
- logger.debug(f"agent {agent_name}: File extraction to {extracted_dir}")
232
- try:
233
- if os.path.exists(extracted_dir): shutil.rmtree(extracted_dir)
234
- os.makedirs(extracted_dir, exist_ok=True)
235
- with tarfile.open(path_to_file.name, "r:gz") as tar:
236
- members_extracted = 0
237
- for member in tar.getmembers():
238
- if not member.isreg(): continue
239
- fname = os.path.basename(member.name)
240
- if not fname or fname.startswith("."): continue
241
- fobj = tar.extractfile(member)
242
- if not fobj: continue
243
- with open(os.path.join(extracted_dir, fname), "wb") as out:
244
- out.write(fobj.read())
245
- members_extracted +=1
246
- if members_extracted == 0:
247
- return (
248
- format_error("Submission tarball is empty or contains no valid files."), # error_message
249
- gr.update(visible=True), # error_modal
250
- gr.update(visible=False), # success_modal
251
- gr.update(visible=False) # loading_modal
252
- )
253
- except Exception as e:
254
- return (
255
- format_error(f"Error extracting file: {e}. Ensure it's a valid .tar.gz."), # error_message
256
- gr.update(visible=True), # error_modal
257
- gr.update(visible=False), # success_modal
258
- gr.update(visible=False) # loading_modal
259
- )
260
-
261
- submission_name = f"{safe_username}_{safe_agent_name}_{submission_time.strftime('%Y-%m-%d_%H-%M-%S')}"
262
-
263
- logger.debug(f"agent {agent_name}: Generate submission.json")
264
- subm_meta = SubmissionMetadata(
265
- agent_name=agent_name,
266
- agent_description=agent_description,
267
- agent_url=agent_url,
268
- openness=openness,
269
- tool_usage=degree_of_control,
270
- username=username,
271
- submit_time=submission_time,
272
- )
273
- with open(os.path.join(extracted_dir, SUBMISSION_METADATA_FILENAME), "w", encoding="utf-8") as fp:
274
- fp.write(subm_meta.model_dump_json(indent=2))
275
-
276
- logger.info(f"agent {agent_name}: Upload raw (unscored) submission files")
277
- try:
278
- dataset_url = upload_submission(extracted_dir, val_or_test, submission_name, profile.username)
279
- except ValueError as e:
280
- return (
281
- format_error(str(e)), # error_message
282
- gr.update(visible=True), # error_modal
283
- gr.update(visible=False), # success_modal
284
- gr.update(visible=False) # loading_modal
285
- )
286
- except Exception as e:
287
- return (
288
- format_error(f"Failed to upload raw submission: {e}"), # error_message
289
- gr.update(visible=True), # error_modal
290
- gr.update(visible=False), # success_modal
291
- gr.update(visible=False) # loading_modal
292
- )
293
-
294
- logger.info(f"agent {agent_name}: Save contact information")
295
- contact_info = subm_meta.model_dump()
296
- contact_info["username_auth"] = profile.username
297
- contact_info["email"] = email
298
- contact_info["email_opt_in"] = email_opt_in
299
- contact_info["role"] = role
300
- contact_info["dataset_url"] = dataset_url
301
-
302
- logger.debug(f"agent {agent_name}: Contact info: {contact_info}")
303
- if val_or_test in contact_infos:
304
- contact_infos[val_or_test] = contact_infos[val_or_test].add_item(contact_info)
305
- else:
306
- contact_infos[val_or_test] = Dataset.from_list([contact_info])
307
-
308
- try:
309
- contact_infos.push_to_hub(
310
- repo_id=CONTACT_DATASET,
311
- config_name=CONFIG_NAME,
312
- commit_message=f'Submission from hf user "{profile.username}" to "{dataset_url}"',
313
- )
314
- except Exception as e:
315
- return (
316
- format_error(f"Submission recorded, but contact info failed to save: {e}"), # error_message
317
- gr.update(visible=True), # error_modal
318
- gr.update(visible=False), # success_modal
319
- gr.update(visible=False) # loading_modal
320
- )
321
-
322
- logger.info(f"Agent '{agent_name}' submitted successfully by '{username}' to '{val_or_test}' split.")
323
- return (
324
- "", # message
325
- gr.update(visible=False), # error_modal
326
- gr.update(visible=True), # success_modal
327
- gr.update(visible=False) # loading_modal
328
- )
329
-
330
-
331
- def _is_hf_acct_too_new(submission_time: datetime, username: str):
332
- user_data_resp = requests.get(f"https://huggingface.co/api/users/{username}/overview")
333
- user_data_resp.raise_for_status()
334
- creation_date_str = user_data_resp.json()["createdAt"]
335
- created_at = datetime.strptime(creation_date_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc)
336
- return submission_time - created_at < timedelta(days=60)
337
-
338
-
339
- def _is_last_submission_too_recent(contact_rows, username, submission_time):
340
- user_submission_dates = sorted(
341
- row["submit_time"] for row in contact_rows if row["username_auth"] == username
342
- )
343
- return user_submission_dates and (submission_time - user_submission_dates[-1] < timedelta(days=1))
344
-
345
-
346
- openness_label_html = f"""<div>
347
- <b>Agent Openness</b>
348
- {build_openness_tooltip_content()}
349
- </div>"""
350
-
351
-
352
- agent_tooling_label_html = f"""<div>
353
- <b>Agent Tooling</b>
354
- {build_tooling_tooltip_content()}
355
- </div>"""
356
-
357
-
358
- heading_html = """
359
- <h2>🚀 Submit an agent for evaluation</h2>
360
- <p>Submit your agent to OpenHands Index for evaluation on real-world scientific tasks. Once submitted, your run will be reviewed by our team. If there are any issues, we’ll reach out within 5–7 business days. We’re working toward full automation, but in the meantime, human review helps ensure quality and trust.</p>
361
- <h3>How to run an evaluation</h3>
362
- <p>Please follow the steps in our <a href="https://github.com/allenai/asta-bench?tab=readme-ov-file#usage" target="_blank">README</a>. You’ll upload your run file at the end of this form.</p>
363
- """
364
-
365
- # --- Submission Accordion ---
366
- def build_page():
367
- with gr.Column(elem_id="submission-page-container"):
368
- gr.HTML(heading_html)
369
- gr.LoginButton()
370
- with gr.Group(elem_classes="custom-form-group"):
371
- gr.HTML(value="""<h2>Submitter Information</h2>""", elem_id="submitter-info-label-html")
372
- gr.HTML(value="""<h3>Username</h3>""", elem_classes="form-label")
373
- username_tb = gr.Textbox(label="This will show on the leaderboard. By default, we’ll use your Hugging Face username; but you can enter your organization name instead (e.g., university, company, or lab).")
374
- gr.HTML(value="""<h3>Role</h3>""", elem_classes="form-label")
375
- role = gr.Dropdown(label="Please select the role that most closely matches your current position. Helps us improve OpenHands Index for different user types. Not displayed on the leaderboard.",
376
- interactive=True,
377
- choices=[
378
- "Undergraduate Student",
379
- "Masters Student",
380
- "PhD Student",
381
- "Postdoctoral Researcher",
382
- "Academic Faculty (e.g., Professor, Lecturer)",
383
- "Industry Researcher (e.g., Research Scientist, Applied Scientist)",
384
- "Engineer or Developer (e.g., Software or ML Engineer)",
385
- "Data Scientist or Analyst",
386
- "Product or Program Manager",
387
- "Startup Founder or Independent Researcher",
388
- "Other"
389
- ])
390
- gr.HTML(value="""<h3>Contact email</h3>""", elem_classes="form-label")
391
- mail_tb = gr.Textbox(label="We'll only use your email to communicate about your submission.")
392
- mail_opt_in = gr.Checkbox(label="I’m open to being contacted by email for user research studies or feedback opportunities.")
393
- with gr.Group(elem_classes="custom-form-group"):
394
- gr.HTML(value="""<h2>Agent Information</h2>""", elem_id="agent-info-label-html")
395
- gr.HTML(value="""<h3>Split</h3>""", elem_classes="form-label")
396
- level_of_test_radio = gr.Radio(choices=[
397
- ("Test set", "test"),
398
- ("Validation set", "validation"),
399
- ], elem_classes="form-label-fieldset", value="validation", label="The Test Set is used for final leaderboard rankings. The Validation Set is for development and iteration. Choose based on your evaluation goal.")
400
- gr.HTML(value="""<h3>Agent name</h3>""", elem_classes="form-label")
401
- agent_name_tb = gr.Textbox(label="This is how your agent will appear on the leaderboard. Use a clear, descriptive name (e.g., Asta Scholar QA, Perplexity Deep Research). Omit model names (e.g. GPT-4, Mistral) as they’ll be shown automatically based on your logs.")
402
- gr.HTML(value="""<h3>Agent description</h3>""", elem_classes="form-label")
403
- agent_desc_tb = gr.Textbox(label="Briefly describe your agent’s approach, core strategies, or what makes it distinct. This description may appear on the leaderboard.")
404
- gr.HTML(value="""<h3>URL</h3>""", elem_classes="form-label")
405
- agent_url_tb = gr.Textbox(label="Link to more information about your agent (e.g. GitHub repo, blog post, or website). This optional link may be shown on the leaderboard to let others explore your agent in more depth.")
406
- gr.HTML(value=openness_label_html, elem_classes="form-label")
407
- openness_radio = gr.Radio([aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS, aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS, aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE, aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY], elem_classes="form-label-fieldset", value=None, label="This affects how your submission is categorized on the leaderboard. Choose based on the availability of your code, model weights, or APIs.")
408
- gr.HTML(value=agent_tooling_label_html, elem_classes="form-label")
409
- degree_of_control_radio = gr.Radio([aliases.CANONICAL_TOOL_USAGE_STANDARD, aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE, aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM], elem_classes="form-label-fieldset",value=None, label="Choose based on the tools and the execution environment your agent used during evaluation.")
410
- gr.HTML(value="""<h3>Submission file</h3>""", elem_classes="form-label")
411
- gr.HTML("<div id='submission-file-label'>Upload your run as a .tar.gz archive prepared using the steps in the <a href='https://github.com/allenai/asta-bench?tab=readme-ov-file#submitting-to-the-leaderboard' target='_blank'>README</a> (“Submitting to the Leaderboard”).</div>")
412
- file_upload_comp = gr.File(
413
- show_label=False,
414
- file_types=[".gz", ".tar.gz"],
415
- )
416
- submit_eval_button = gr.Button("Submit Evaluation", elem_id="submission-button")
417
- # Modals for loading spinner, success and error messages
418
- with Modal(visible=False, elem_id="submission-modal") as loading_modal:
419
- with gr.Column(elem_id="submission-modal-content"):
420
- gr.HTML('<div class="spinner-container"><div class="spinner"></div><p>Processing your submission...</p></div>')
421
-
422
- with Modal(visible=False, elem_id="submission-modal") as error_modal:
423
- with gr.Column(elem_id="submission-modal-content"):
424
- gr.Markdown("## ⚠️ Error")
425
- error_message = gr.Markdown()
426
-
427
- with Modal(visible=False, elem_id="success-modal") as success_modal:
428
- with gr.Column(elem_id="submission-modal-content"):
429
- gr.Markdown(SUBMISSION_CONFIRMATION)
430
- with Modal(visible=False, elem_id="submission-modal") as disclaimer_modal:
431
- with gr.Column(elem_id="legal-modal-content"):
432
- gr.HTML(LEGAL_DISCLAIMER_TEXT)
433
- with gr.Row():
434
- agree_button = gr.Button("I agree to the terms and conditions above", variant="primary")
435
-
436
- def accept_and_load():
437
- return [
438
- gr.update(visible=False), # Hide disclaimer_modal
439
- gr.update(visible=True) # Show loading_modal
440
- ]
441
-
442
- def show_disclaimer():
443
- return gr.update(visible=True)
444
-
445
- submit_eval_button.click(
446
- fn=show_disclaimer,
447
- inputs=None,
448
- outputs=[disclaimer_modal]
449
- )
450
-
451
- agree_button.click(
452
- fn=accept_and_load,
453
- inputs=None,
454
- outputs=[disclaimer_modal, loading_modal],
455
- ).then(
456
- fn=add_new_eval,
457
- inputs=[
458
- level_of_test_radio,
459
- agent_name_tb,
460
- agent_desc_tb,
461
- agent_url_tb,
462
- openness_radio,
463
- degree_of_control_radio,
464
- file_upload_comp,
465
- username_tb,
466
- role,
467
- mail_tb,
468
- mail_opt_in
469
- ],
470
- outputs=[error_message, error_modal, success_modal, loading_modal],
471
- )
472
- # hiding this for now till we have the real paper data
473
- # with gr.Accordion("📙 Citation", open=False):
474
- # gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button-main", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
submission_utils.py DELETED
@@ -1,88 +0,0 @@
1
- """
2
- Utilities for submission handling, replacing agent-eval dependencies.
3
- """
4
- import re
5
- from pathlib import Path
6
- from typing import Optional
7
-
8
-
9
- # Constants
10
- SUBMISSION_METADATA_FILENAME = "metadata.json"
11
-
12
-
13
- # Simple SubmissionMetadata class
14
- class SubmissionMetadata:
15
- """Simple metadata for submissions."""
16
- def __init__(self, **kwargs):
17
- self.agent_name = kwargs.get("agent_name", "")
18
- self.llm_base = kwargs.get("llm_base", "")
19
- self.openness = kwargs.get("openness", "")
20
- self.tool_usage = kwargs.get("tool_usage", "")
21
- self.submitter_name = kwargs.get("submitter_name", "")
22
- self.submitter_email = kwargs.get("submitter_email", "")
23
-
24
- def to_dict(self):
25
- return {
26
- "agent_name": self.agent_name,
27
- "llm_base": self.llm_base,
28
- "openness": self.openness,
29
- "tool_usage": self.tool_usage,
30
- "submitter_name": self.submitter_name,
31
- "submitter_email": self.submitter_email,
32
- }
33
-
34
-
35
- # Path validation functions
36
- def _validate_path_component(component: str, allow_underscores: bool = True) -> None:
37
- """
38
- Validate a single path component.
39
-
40
- Args:
41
- component: The path component to validate
42
- allow_underscores: Whether to allow underscores in the component
43
-
44
- Raises:
45
- ValueError: If the component is invalid
46
- """
47
- if not component:
48
- raise ValueError("Path component cannot be empty")
49
-
50
- if component in (".", ".."):
51
- raise ValueError(f"Path component cannot be '{component}'")
52
-
53
- # Check for invalid characters
54
- pattern = r'^[a-zA-Z0-9_\-\.]+$' if allow_underscores else r'^[a-zA-Z0-9\-\.]+$'
55
- if not re.match(pattern, component):
56
- raise ValueError(
57
- f"Path component '{component}' contains invalid characters. "
58
- f"Only alphanumeric, hyphens, dots{', and underscores' if allow_underscores else ''} are allowed."
59
- )
60
-
61
-
62
- def sanitize_path_component(component: str, replacement: str = "_") -> str:
63
- """
64
- Sanitize a path component by replacing invalid characters.
65
-
66
- Args:
67
- component: The path component to sanitize
68
- replacement: The character to use for replacing invalid characters
69
-
70
- Returns:
71
- Sanitized path component
72
- """
73
- if not component:
74
- return "unnamed"
75
-
76
- # Replace any non-alphanumeric, non-hyphen, non-dot, non-underscore with replacement
77
- sanitized = re.sub(r'[^a-zA-Z0-9_\-\.]', replacement, component)
78
-
79
- # Remove leading/trailing dots or hyphens
80
- sanitized = sanitized.strip('.-')
81
-
82
- # Collapse multiple replacements into one
83
- sanitized = re.sub(f'{re.escape(replacement)}+', replacement, sanitized)
84
-
85
- if not sanitized:
86
- return "unnamed"
87
-
88
- return sanitized
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/integration/test_submission.py DELETED
@@ -1,108 +0,0 @@
1
- import json
2
- import os
3
- from datetime import datetime
4
-
5
- import gradio
6
- import pytest
7
- from huggingface_hub import HfApi, hf_hub_download
8
-
9
- from aliases import CANONICAL_TOOL_USAGE_STANDARD, CANONICAL_OPENNESS_CLOSED_UI_ONLY
10
- from config import IS_INTERNAL, CONFIG_NAME, CONTACT_DATASET, SUBMISSION_DATASET
11
- from submission import add_new_eval
12
- from submission_utils import SubmissionMetadata
13
-
14
- _hf = HfApi()
15
-
16
-
17
- class TestSubmission:
18
- @pytest.fixture(autouse=True)
19
- def setup(self):
20
- # These need to be set before imports are evaluated so all we can do here
21
- # is check that they have been set correctly.
22
- assert IS_INTERNAL == True
23
- assert CONFIG_NAME == "continuous-integration"
24
-
25
- def test_add_new_eval(self, mocker):
26
- # Bypass some checks so that the test can cover later parts of the code.
27
- mocker.patch("submission._is_hf_acct_too_new", return_value=False)
28
- mocker.patch("submission._is_last_submission_too_recent", return_value=False)
29
-
30
- # We use this to find records corresponding to this test.
31
- agent_description = f"CI run at {datetime.now().isoformat()}"
32
- print(f"Using unique agent description: {agent_description}")
33
-
34
- print("Submitting test submission...")
35
- with open(os.path.join(os.path.dirname(__file__), "test-submission.tar.gz"), "rb") as f:
36
- result = add_new_eval(
37
- val_or_test="test",
38
- agent_name="TestSubmissionIntegration",
39
- agent_description=agent_description,
40
- agent_url="https://github.com/allenai/asta-bench-leaderboard/blob/main/tests/integration/test_submission.py",
41
- openness=CANONICAL_OPENNESS_CLOSED_UI_ONLY,
42
- degree_of_control=CANONICAL_TOOL_USAGE_STANDARD,
43
- path_to_file=f,
44
- username="test_user",
45
- role="Other",
46
- email="jasond+asta_testing@allenai.org",
47
- email_opt_in=True,
48
- profile=gradio.OAuthProfile({
49
- "name": "Test User",
50
- "preferred_username": "test_user",
51
- "profile": "test_user_profile",
52
- "picture": "https://placecats.com/150/150",
53
- }),
54
- )
55
-
56
- message, error_modal, success_modal, loading_modal = result
57
- assert message == "" # Success
58
- assert error_modal == {'__type__': 'update', 'visible': False}
59
- assert success_modal == {'__type__': 'update', 'visible': True}
60
- assert loading_modal == {'__type__': 'update', 'visible': False}
61
-
62
- print("Looking up contact record...")
63
- contacts = load_dataset(path=CONTACT_DATASET,
64
- name=CONFIG_NAME,
65
- download_mode="force_redownload",
66
- verification_mode=VerificationMode.NO_CHECKS)
67
- # There should have been a new entry due to this test with our unique description.
68
- found_contact = next(row for row in contacts['test'] if row['agent_description'] == agent_description)
69
- assert found_contact
70
-
71
- # This contains an attribute that should lead us to files in the submissions dataset.
72
- dataset_url = found_contact['dataset_url']
73
- print(f"Found dataset URL: {dataset_url}")
74
- assert dataset_url.startswith(
75
- "hf://datasets/allenai/asta-bench-internal-submissions/continuous-integration/test/")
76
-
77
- print("Checking submission dataset...")
78
- # Commit message itself should link this and the contact record together unambiguously.
79
- recent_commits = _hf.list_repo_commits(repo_type="dataset", repo_id=SUBMISSION_DATASET)
80
- assert any(dataset_url in c.title for c in recent_commits)
81
-
82
- print("Checking that files are present...")
83
- rel_path = dataset_url[len("hf://datasets/allenai/asta-bench-internal-submissions/"):]
84
- ds_info = _hf.dataset_info(SUBMISSION_DATASET)
85
- # These are the files in our test-submission.tar.gz
86
- assert any(f"{rel_path}/eval_config.json" == f.rfilename for f in ds_info.siblings)
87
- assert any(f"{rel_path}/task_sqa_solver_openscilm.eval" == f.rfilename for f in ds_info.siblings)
88
- # This is the generated metadata put into the dataset itself.
89
- assert any(f"{rel_path}/submission.json" == f.rfilename for f in ds_info.siblings)
90
-
91
- print("Checking contact record against submission.json...")
92
- # Checks on contact record which is stored in a private dataset.
93
- local_path = hf_hub_download(repo_type="dataset",
94
- repo_id=SUBMISSION_DATASET,
95
- filename=f"{rel_path}/submission.json")
96
- with open(local_path) as f:
97
- contact_from_json = json.load(f)
98
- # Assert that all keys and values in submission.json are present in the contact record
99
- for key, value_from_json in contact_from_json.items():
100
- value_from_dataset = found_contact[key]
101
- if isinstance(value_from_dataset, datetime):
102
- value_from_dataset = found_contact[key].isoformat().replace('+00:00', 'Z')
103
- assert value_from_dataset == value_from_json
104
- # submission.json should not contain sensitive PII, specifically, email.
105
- assert 'email' in found_contact
106
- assert 'email' not in contact_from_json
107
- # submission.json is defined by a specific data model.
108
- SubmissionMetadata.model_validate(contact_from_json)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ui_components.py CHANGED
@@ -20,6 +20,8 @@ from leaderboard_transformer import (
20
  format_runtime_column,
21
  get_pareto_df,
22
  clean_llm_base_list,
 
 
23
  )
24
  from config import (
25
  CONFIG_NAME,
@@ -39,56 +41,6 @@ from content import (
39
  api = HfApi()
40
  os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
41
 
42
- # Company logo mapping - maps model name patterns to company logo files
43
- COMPANY_LOGO_MAP = {
44
- "anthropic": {"path": "assets/logo-anthropic.svg", "name": "Anthropic"},
45
- "claude": {"path": "assets/logo-anthropic.svg", "name": "Anthropic"},
46
- "openai": {"path": "assets/logo-openai.svg", "name": "OpenAI"},
47
- "gpt": {"path": "assets/logo-openai.svg", "name": "OpenAI"},
48
- "o1": {"path": "assets/logo-openai.svg", "name": "OpenAI"},
49
- "o3": {"path": "assets/logo-openai.svg", "name": "OpenAI"},
50
- "google": {"path": "assets/logo-google.svg", "name": "Google"},
51
- "gemini": {"path": "assets/logo-google.svg", "name": "Google"},
52
- "gemma": {"path": "assets/logo-google.svg", "name": "Google"},
53
- "meta": {"path": "assets/logo-meta.svg", "name": "Meta"},
54
- "llama": {"path": "assets/logo-meta.svg", "name": "Meta"},
55
- "mistral": {"path": "assets/logo-mistral.svg", "name": "Mistral"},
56
- "mixtral": {"path": "assets/logo-mistral.svg", "name": "Mistral"},
57
- "codestral": {"path": "assets/logo-mistral.svg", "name": "Mistral"},
58
- "deepseek": {"path": "assets/logo-deepseek.svg", "name": "DeepSeek"},
59
- "xai": {"path": "assets/logo-xai.svg", "name": "xAI"},
60
- "grok": {"path": "assets/logo-xai.svg", "name": "xAI"},
61
- "cohere": {"path": "assets/logo-cohere.svg", "name": "Cohere"},
62
- "command": {"path": "assets/logo-cohere.svg", "name": "Cohere"},
63
- "qwen": {"path": "assets/logo-qwen.svg", "name": "Qwen"},
64
- "alibaba": {"path": "assets/logo-qwen.svg", "name": "Qwen"},
65
- "kimi": {"path": "assets/logo-moonshot.svg", "name": "Moonshot"},
66
- "moonshot": {"path": "assets/logo-moonshot.svg", "name": "Moonshot"},
67
- "minimax": {"path": "assets/logo-minimax.svg", "name": "MiniMax"},
68
- }
69
-
70
-
71
- def get_company_from_model(model_name: str) -> dict:
72
- """
73
- Gets the company info (logo path and name) from a model name.
74
- Returns default unknown logo if no match found.
75
- """
76
- if not model_name:
77
- return {"path": "assets/logo-unknown.svg", "name": "Unknown"}
78
-
79
- # Handle list of models - use the first one
80
- if isinstance(model_name, list):
81
- model_name = model_name[0] if model_name else ""
82
-
83
- model_lower = str(model_name).lower()
84
-
85
- # Check each pattern
86
- for pattern, company_info in COMPANY_LOGO_MAP.items():
87
- if pattern in model_lower:
88
- return company_info
89
-
90
- return {"path": "assets/logo-unknown.svg", "name": "Unknown"}
91
-
92
 
93
  def get_company_logo_html(model_name: str) -> str:
94
  """
@@ -135,26 +87,6 @@ def get_svg_as_data_uri(path: str) -> str:
135
  print(f"Warning: SVG file not found at {path}")
136
  return ""
137
 
138
- def create_svg_html(value, svg_map):
139
- """
140
- Generates the absolute simplest HTML for an icon, without any extra text.
141
- This version is compatible with gr.DataFrame.
142
- """
143
- if pd.isna(value) or value not in svg_map:
144
- return ""
145
-
146
- path_info = svg_map[value]
147
- # Handle both old string format and new object format
148
- if isinstance(path_info, dict):
149
- path = path_info["path"]
150
- else:
151
- path = path_info
152
-
153
- src = get_svg_as_data_uri(path)
154
- # Generate the HTML for the single icon, with NO text.
155
- if src:
156
- return f'<img src="{src}" style="width: 16px; height: 16px; vertical-align: middle;" alt="{value}" title="{value}">'
157
- return ""
158
 
159
  def build_openness_tooltip_content() -> str:
160
  """
@@ -1355,23 +1287,3 @@ def create_sub_navigation_bar(tag_map: dict, category_name: str, validation: boo
1355
 
1356
  # Return the entire navigation bar as one single Gradio HTML component
1357
  return gr.HTML(full_html)
1358
-
1359
- def format_llm_base_with_html(value):
1360
- """
1361
- Formats the 'Models Used' cell value.
1362
- If the value is a list with more than 1 element, it returns an
1363
- HTML <span> with the full list in a hover-over tooltip.
1364
- If it's a single-element list, it returns just that element.
1365
- Otherwise, it returns the original value.
1366
- """
1367
- if isinstance(value, list):
1368
- if len(value) > 1:
1369
- # Join the list items with a newline character for a clean tooltip
1370
- tooltip_text = "\n".join(map(str, value))
1371
- # Return an HTML span with the title attribute for the tooltip
1372
- return f'<span class="tooltip-icon cell-tooltip-icon" style="cursor: help;" data-tooltip="{tooltip_text}">{value[0]} (+ {len(value) - 1}) ⓘ</span>'
1373
- if len(value) == 1:
1374
- # If only one item, just return that item
1375
- return value[0]
1376
- # Return the value as-is if it's not a list or is an empty list
1377
- return value
 
20
  format_runtime_column,
21
  get_pareto_df,
22
  clean_llm_base_list,
23
+ get_company_from_model,
24
+ COMPANY_LOGO_MAP,
25
  )
26
  from config import (
27
  CONFIG_NAME,
 
41
  api = HfApi()
42
  os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  def get_company_logo_html(model_name: str) -> str:
46
  """
 
87
  print(f"Warning: SVG file not found at {path}")
88
  return ""
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  def build_openness_tooltip_content() -> str:
92
  """
 
1287
 
1288
  # Return the entire navigation bar as one single Gradio HTML component
1289
  return gr.HTML(full_html)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
visualizations.py CHANGED
@@ -9,8 +9,9 @@ import os
9
  import base64
10
  import aliases
11
 
12
- # Import company logo mapping from ui_components
13
- from ui_components import get_company_from_model, get_svg_as_data_uri
 
14
 
15
  # Standard layout configuration matching existing charts
16
  # Colors aligned with OpenHands brand
 
9
  import base64
10
  import aliases
11
 
12
+ # Import company logo mapping from leaderboard_transformer
13
+ from leaderboard_transformer import get_company_from_model
14
+ from ui_components import get_svg_as_data_uri
15
 
16
  # Standard layout configuration matching existing charts
17
  # Colors aligned with OpenHands brand